5 changes: 5 additions & 0 deletions llvm/lib/Target/ARM/ARMAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2409,6 +2409,11 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
case ARM::SEH_EpilogEnd:
ATS.emitARMWinCFIEpilogEnd();
return;

case ARM::PseudoARMInitUndefMQPR:
case ARM::PseudoARMInitUndefSPR:
case ARM::PseudoARMInitUndefDPR_VFP2:
return;
}

MCInst TmpInst;
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,20 @@
#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H
#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H

#include "ARMBaseRegisterInfo.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/Support/ErrorHandling.h"
#include <array>
#include <cstdint>

Expand Down Expand Up @@ -536,6 +540,17 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {

std::optional<RegImmPair> isAddImmediate(const MachineInstr &MI,
Register Reg) const override;

unsigned getUndefInitOpcode(unsigned RegClassID) const override {
if (RegClassID == ARM::MQPRRegClass.getID())
return ARM::PseudoARMInitUndefMQPR;
if (RegClassID == ARM::SPRRegClass.getID())
return ARM::PseudoARMInitUndefSPR;
if (RegClassID == ARM::DPR_VFP2RegClass.getID())
return ARM::PseudoARMInitUndefDPR_VFP2;

llvm_unreachable("Unexpected register class.");
}
};

/// Get the operands corresponding to the given \p Pred value. By default, the
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,17 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
unsigned SrcSubReg) const override;

int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }

const TargetRegisterClass *
getTargetRegisterClass(const TargetRegisterClass *RC) const override {
if (ARM::MQPRRegClass.hasSubClassEq(RC))
return &ARM::MQPRRegClass;
if (ARM::SPRRegClass.hasSubClassEq(RC))
return &ARM::SPRRegClass;
if (ARM::DPR_VFP2RegClass.hasSubClassEq(RC))
return &ARM::DPR_VFP2RegClass;
return RC;
}
};

} // end namespace llvm
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -6532,3 +6532,14 @@ let isPseudo = 1 in {
let isTerminator = 1 in
def SEH_EpilogEnd : PseudoInst<(outs), (ins), NoItinerary, []>, Sched<[]>;
}


//===----------------------------------------------------------------------===//
// Pseudo Instructions for use when early-clobber is defined and Greedy Register
// Allocation is used. This ensures the constraint is used properly.
//===----------------------------------------------------------------------===//
let isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
def PseudoARMInitUndefMQPR : PseudoInst<(outs MQPR:$vd), (ins), NoItinerary, []>;
def PseudoARMInitUndefSPR : PseudoInst<(outs SPR:$sd), (ins), NoItinerary, []>;
def PseudoARMInitUndefDPR_VFP2 : PseudoInst<(outs DPR_VFP2:$dd), (ins), NoItinerary, []>;
}
8 changes: 8 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,14 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return &InstrInfo->getRegisterInfo();
}

/// Returns true as the ARM Architecture is supported by the Init Undef Pass.
/// We want to enable this for MVE and NEON instructions, however this can be
/// easily expanded by adding more Pseudo Instructions for the relevant
/// Register types.
bool supportsInitUndef() const override {
return HasMVEIntegerOps || HasNEON;
}

const CallLowering *getCallLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ add_llvm_target(RISCVCodeGen
RISCVMoveMerger.cpp
RISCVPushPopOptimizer.cpp
RISCVRegisterInfo.cpp
RISCVRVVInitUndef.cpp
RISCVSubtarget.cpp
RISCVTargetMachine.cpp
RISCVTargetObjectFile.cpp
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ void initializeRISCVInsertWriteVXRMPass(PassRegistry &);
FunctionPass *createRISCVRedundantCopyEliminationPass();
void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);

FunctionPass *createRISCVInitUndefPass();
void initializeRISCVInitUndefPass(PassRegistry &);
extern char &RISCVInitUndefID;

FunctionPass *createRISCVMoveMergePass();
void initializeRISCVMoveMergePass(PassRegistry &);

Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
#ifndef LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H
#define LLVM_LIB_TARGET_RISCV_RISCVINSTRINFO_H

#include "RISCV.h"
#include "RISCVRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DiagnosticInfo.h"

#define GET_INSTRINFO_HEADER
#define GET_INSTRINFO_OPERAND_ENUM
#include "RISCVGenInstrInfo.inc"
#include "RISCVGenRegisterInfo.inc"

namespace llvm {

Expand Down Expand Up @@ -262,6 +264,21 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const override;

unsigned getUndefInitOpcode(unsigned RegClassID) const override {
switch (RegClassID) {
case RISCV::VRRegClassID:
return RISCV::PseudoRVVInitUndefM1;
case RISCV::VRM2RegClassID:
return RISCV::PseudoRVVInitUndefM2;
case RISCV::VRM4RegClassID:
return RISCV::PseudoRVVInitUndefM4;
case RISCV::VRM8RegClassID:
return RISCV::PseudoRVVInitUndefM8;
default:
llvm_unreachable("Unexpected register class.");
}
}

protected:
const RISCVSubtarget &STI;

Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,19 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
SmallVectorImpl<MCPhysReg> &Hints,
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;

const TargetRegisterClass *
getTargetRegisterClass(const TargetRegisterClass *RC) const override {
if (RISCV::VRM8RegClass.hasSubClassEq(RC))
return &RISCV::VRM8RegClass;
if (RISCV::VRM4RegClass.hasSubClassEq(RC))
return &RISCV::VRM4RegClass;
if (RISCV::VRM2RegClass.hasSubClassEq(RC))
return &RISCV::VRM2RegClass;
if (RISCV::VRRegClass.hasSubClassEq(RC))
return &RISCV::VRRegClass;
return RC;
}
};
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
};

unsigned getMinimumJumpTableEntries() const;

bool supportsInitUndef() const override { return hasVInstructions(); }
};
} // End llvm namespace

Expand Down
10 changes: 1 addition & 9 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVInsertWriteVXRMPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
initializeRISCVInitUndefPass(*PR);
initializeRISCVMoveMergePass(*PR);
initializeRISCVPushPopOptPass(*PR);
}
Expand Down Expand Up @@ -400,7 +399,6 @@ class RISCVPassConfig : public TargetPassConfig {
bool addRegAssignAndRewriteOptimized() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
void addOptimizedRegAlloc() override;
void addFastRegAlloc() override;
};
} // namespace
Expand Down Expand Up @@ -575,14 +573,8 @@ void RISCVPassConfig::addPreRegAlloc() {
addPass(createRISCVInsertWriteVXRMPass());
}

void RISCVPassConfig::addOptimizedRegAlloc() {
insertPass(&DetectDeadLanesID, &RISCVInitUndefID);

TargetPassConfig::addOptimizedRegAlloc();
}

void RISCVPassConfig::addFastRegAlloc() {
addPass(createRISCVInitUndefPass());
addPass(&InitUndefID);
TargetPassConfig::addFastRegAlloc();
}

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@
; CHECK-NEXT: AArch64 MI Peephole Optimization pass
; CHECK-NEXT: AArch64 Dead register definitions
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
Expand Down
4 changes: 4 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@
; GCN-O1-NEXT: Register Usage Information Propagation
; GCN-O1-NEXT: Detect Dead Lanes
; GCN-O1-NEXT: Remove dead machine instructions
; GCN-O1-NEXT: Init Undef Pass
; GCN-O1-NEXT: Process Implicit Definitions
; GCN-O1-NEXT: Remove unreachable machine basic blocks
; GCN-O1-NEXT: Live Variable Analysis
Expand Down Expand Up @@ -618,6 +619,7 @@
; GCN-O1-OPTS-NEXT: Register Usage Information Propagation
; GCN-O1-OPTS-NEXT: Detect Dead Lanes
; GCN-O1-OPTS-NEXT: Remove dead machine instructions
; GCN-O1-OPTS-NEXT: Init Undef Pass
; GCN-O1-OPTS-NEXT: Process Implicit Definitions
; GCN-O1-OPTS-NEXT: Remove unreachable machine basic blocks
; GCN-O1-OPTS-NEXT: Live Variable Analysis
Expand Down Expand Up @@ -920,6 +922,7 @@
; GCN-O2-NEXT: Register Usage Information Propagation
; GCN-O2-NEXT: Detect Dead Lanes
; GCN-O2-NEXT: Remove dead machine instructions
; GCN-O2-NEXT: Init Undef Pass
; GCN-O2-NEXT: Process Implicit Definitions
; GCN-O2-NEXT: Remove unreachable machine basic blocks
; GCN-O2-NEXT: Live Variable Analysis
Expand Down Expand Up @@ -1235,6 +1238,7 @@
; GCN-O3-NEXT: Register Usage Information Propagation
; GCN-O3-NEXT: Detect Dead Lanes
; GCN-O3-NEXT: Remove dead machine instructions
; GCN-O3-NEXT: Init Undef Pass
; GCN-O3-NEXT: Process Implicit Definitions
; GCN-O3-NEXT: Remove unreachable machine basic blocks
; GCN-O3-NEXT: Live Variable Analysis
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/ARM/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@
; CHECK-NEXT: ARM pre- register allocation load / store optimization pass
; CHECK-NEXT: ARM A15 S->D optimizer
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT; Init Undef Pass
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/LoongArch/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: LoongArch Pre-RA pseudo instruction expansion pass
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/PowerPC/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Modulo Software Pipelining
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT; Init Undef Pass
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
; CHECK-NEXT: RISC-V Insert VSETVLI pass
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: RISC-V init undef pass
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Eliminate PHI nodes for register allocation
; CHECK-NEXT: Two-Address instruction pass
; CHECK-NEXT: Fast Register Allocator
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass
; CHECK-NEXT: RISC-V Insert Write VXRM Pass
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: RISC-V init undef pass
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=riscv-init-undef -o - %s | FileCheck %s --check-prefix=MIR
# RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -run-pass=init-undef -o - %s | FileCheck %s --check-prefix=MIR
...
---
name: vrgather_all_undef
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=riscv-init-undef -o - | FileCheck %s
# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=init-undef -o - | FileCheck %s

...
---
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass riscv-init-undef -run-pass machineverifier %s -o - | FileCheck %s
# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass init-undef -run-pass machineverifier %s -o - | FileCheck %s

--- |
source_filename = "<stdin>"
Expand Down
11 changes: 11 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-intrinsics/vcaddq.ll
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,17 @@ entry:
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
; CHECK-NEXT: bx lr
entry:
%0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
; CHECK: @ %bb.0: @ %entry
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-laneinterleaving-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -373,13 +373,13 @@ define arm_aapcs_vfpcc void @mul_i32(ptr %A, ptr %B, i64 %C, ptr %D) {
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: ldr.w lr, [sp, #20]
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: vmov.f32 s14, s5
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: vmov r5, s4
; CHECK-NEXT: vmov.f32 s4, s6
; CHECK-NEXT: vmov.f32 s6, s7
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov r1, s14
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: smull r12, r3, r1, r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.f32 s0, s2
Expand Down
114 changes: 57 additions & 57 deletions llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -222,88 +222,88 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: vldrw.u32 q1, [r4]
; CHECK-NEXT: .LBB1_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q4, [r5], #16
; CHECK-NEXT: vldrw.u32 q3, [r0], #16
; CHECK-NEXT: vldrw.u32 q3, [r5], #16
; CHECK-NEXT: vldrw.u32 q2, [r0], #16
; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: vmov.f32 s8, s14
; CHECK-NEXT: vmov.f32 s16, s10
; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: vmov.f32 s20, s18
; CHECK-NEXT: vmov.f32 s20, s14
; CHECK-NEXT: vmov.f32 s18, s11
; CHECK-NEXT: vmov.f32 s22, s15
; CHECK-NEXT: mov.w r8, #0
; CHECK-NEXT: vmov.f32 s10, s15
; CHECK-NEXT: vmov.f32 s22, s19
; CHECK-NEXT: vmullb.s32 q6, q5, q2
; CHECK-NEXT: vmov.f32 s18, s17
; CHECK-NEXT: vmullb.s32 q6, q5, q4
; CHECK-NEXT: vmov.f32 s14, s13
; CHECK-NEXT: vmov r4, r7, d12
; CHECK-NEXT: asrl r4, r7, #31
; CHECK-NEXT: vmov.f32 s14, s13
; CHECK-NEXT: vmov.f32 s10, s9
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
; CHECK-NEXT: sbcs.w r5, r2, r7
; CHECK-NEXT: csetm r5, lt
; CHECK-NEXT: bfi r8, r5, #0, #8
; CHECK-NEXT: vmov r10, r5, d13
; CHECK-NEXT: asrl r10, r5, #31
; CHECK-NEXT: vmov r6, s18
; CHECK-NEXT: vmov r6, s14
; CHECK-NEXT: rsbs.w r3, r10, #-2147483648
; CHECK-NEXT: vmov q2[2], q2[0], r4, r10
; CHECK-NEXT: vmov q4[2], q4[0], r4, r10
; CHECK-NEXT: sbcs.w r3, r2, r5
; CHECK-NEXT: vmov q2[3], q2[1], r7, r5
; CHECK-NEXT: vmov q4[3], q4[1], r7, r5
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r8, r3, #8, #8
; CHECK-NEXT: vmsr p0, r8
; CHECK-NEXT: mvn r8, #-2147483648
; CHECK-NEXT: vpsel q2, q2, q0
; CHECK-NEXT: vmov r3, r4, d4
; CHECK-NEXT: vpsel q4, q4, q0
; CHECK-NEXT: vmov r3, r4, d8
; CHECK-NEXT: subs.w r3, r3, r8
; CHECK-NEXT: sbcs r3, r4, #0
; CHECK-NEXT: mov.w r4, #0
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r4, r3, #0, #8
; CHECK-NEXT: vmov r3, r5, d5
; CHECK-NEXT: vmov r3, r5, d9
; CHECK-NEXT: subs.w r3, r3, r8
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r4, r3, #8, #8
; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov r3, s8
; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: vmov r4, s16
; CHECK-NEXT: vpsel q2, q2, q1
; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: vpsel q4, q4, q1
; CHECK-NEXT: smull r4, r7, r4, r3
; CHECK-NEXT: asrl r4, r7, #31
; CHECK-NEXT: rsbs.w r3, r4, #-2147483648
; CHECK-NEXT: sbcs.w r3, r2, r7
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r5, r3, #0, #8
; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: vmov r3, s10
; CHECK-NEXT: smull r6, r3, r6, r3
; CHECK-NEXT: asrl r6, r3, #31
; CHECK-NEXT: rsbs.w r1, r6, #-2147483648
; CHECK-NEXT: vmov q3[2], q3[0], r4, r6
; CHECK-NEXT: vmov q2[2], q2[0], r4, r6
; CHECK-NEXT: sbcs.w r1, r2, r3
; CHECK-NEXT: vmov q3[3], q3[1], r7, r3
; CHECK-NEXT: vmov q2[3], q2[1], r7, r3
; CHECK-NEXT: csetm r1, lt
; CHECK-NEXT: bfi r5, r1, #8, #8
; CHECK-NEXT: vmsr p0, r5
; CHECK-NEXT: ldrd r5, r2, [sp, #8] @ 8-byte Folded Reload
; CHECK-NEXT: vpsel q3, q3, q0
; CHECK-NEXT: vmov r1, r3, d6
; CHECK-NEXT: vpsel q2, q2, q0
; CHECK-NEXT: vmov r1, r3, d4
; CHECK-NEXT: subs.w r1, r1, r8
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: csetm r1, lt
; CHECK-NEXT: bfi r3, r1, #0, #8
; CHECK-NEXT: vmov r1, r4, d7
; CHECK-NEXT: vmov r1, r4, d5
; CHECK-NEXT: subs.w r1, r1, r8
; CHECK-NEXT: sbcs r1, r4, #0
; CHECK-NEXT: csetm r1, lt
; CHECK-NEXT: bfi r3, r1, #8, #8
; CHECK-NEXT: vmsr p0, r3
; CHECK-NEXT: vpsel q3, q3, q1
; CHECK-NEXT: vmov.f32 s13, s14
; CHECK-NEXT: vmov.f32 s14, s8
; CHECK-NEXT: vmov.f32 s15, s10
; CHECK-NEXT: vstrb.8 q3, [r2], #16
; CHECK-NEXT: vpsel q2, q2, q1
; CHECK-NEXT: vmov.f32 s9, s10
; CHECK-NEXT: vmov.f32 s10, s16
; CHECK-NEXT: vmov.f32 s11, s18
; CHECK-NEXT: vstrb.8 q2, [r2], #16
; CHECK-NEXT: le lr, .LBB1_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
; CHECK-NEXT: ldrd r1, r3, [sp] @ 8-byte Folded Reload
Expand Down Expand Up @@ -462,14 +462,14 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: vcmp.u32 cs, q1, q4
; CHECK-NEXT: vstr p0, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: vpstt
; CHECK-NEXT: vldrwt.u32 q5, [r0], #16
; CHECK-NEXT: vldrwt.u32 q6, [r1], #16
; CHECK-NEXT: vmov.f32 s16, s22
; CHECK-NEXT: vmov.f32 s18, s23
; CHECK-NEXT: vmov.f32 s28, s26
; CHECK-NEXT: vmov.f32 s30, s27
; CHECK-NEXT: vmullb.s32 q0, q7, q4
; CHECK-NEXT: vmov.f32 s22, s25
; CHECK-NEXT: vldrwt.u32 q4, [r0], #16
; CHECK-NEXT: vldrwt.u32 q5, [r1], #16
; CHECK-NEXT: vmov.f32 s24, s18
; CHECK-NEXT: vmov.f32 s26, s19
; CHECK-NEXT: vmov.f32 s28, s22
; CHECK-NEXT: vmov.f32 s30, s23
; CHECK-NEXT: vmullb.s32 q0, q7, q6
; CHECK-NEXT: vmov.f32 s18, s21
; CHECK-NEXT: vmov r10, r5, d0
; CHECK-NEXT: asrl r10, r5, #31
; CHECK-NEXT: rsbs.w r7, r10, #-2147483648
Expand All @@ -483,7 +483,7 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: sbcs.w r3, r12, r7
; CHECK-NEXT: vmov q0[3], q0[1], r5, r7
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: vmov r7, s22
; CHECK-NEXT: vmov r7, s18
; CHECK-NEXT: bfi r4, r3, #8, #8
; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: vpsel q0, q0, q2
Expand All @@ -498,11 +498,11 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: csetm r3, lt
; CHECK-NEXT: bfi r4, r3, #8, #8
; CHECK-NEXT: vmov r3, s20
; CHECK-NEXT: vmov r3, s16
; CHECK-NEXT: vmsr p0, r4
; CHECK-NEXT: vmov r4, s24
; CHECK-NEXT: vpsel q4, q0, q3
; CHECK-NEXT: vmov.f32 s2, s21
; CHECK-NEXT: vmov r4, s20
; CHECK-NEXT: vpsel q6, q0, q3
; CHECK-NEXT: vmov.f32 s2, s17
; CHECK-NEXT: smull r10, r5, r4, r3
; CHECK-NEXT: movs r4, #0
; CHECK-NEXT: asrl r10, r5, #31
Expand Down Expand Up @@ -536,8 +536,8 @@ define arm_aapcs_vfpcc void @ssatmul_4t_q31(ptr nocapture readonly %pSrcA, ptr n
; CHECK-NEXT: vpsel q0, q0, q3
; CHECK-NEXT: vldr p0, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmov.f32 s1, s2
; CHECK-NEXT: vmov.f32 s2, s16
; CHECK-NEXT: vmov.f32 s3, s18
; CHECK-NEXT: vmov.f32 s2, s24
; CHECK-NEXT: vmov.f32 s3, s26
; CHECK-NEXT: vpst
; CHECK-NEXT: vstrwt.32 q0, [r2], #16
; CHECK-NEXT: le lr, .LBB2_2
Expand Down Expand Up @@ -778,34 +778,34 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: .LBB4_4: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vldrw.u32 q3, [r1], #16
; CHECK-NEXT: vmov.f32 s8, s6
; CHECK-NEXT: vmov.f32 s16, s14
; CHECK-NEXT: vmov.f32 s10, s7
; CHECK-NEXT: vmov.f32 s18, s15
; CHECK-NEXT: vmullb.u32 q5, q4, q2
; CHECK-NEXT: vldrw.u32 q2, [r1], #16
; CHECK-NEXT: vmov.f32 s12, s6
; CHECK-NEXT: vmov.f32 s16, s10
; CHECK-NEXT: vmov.f32 s14, s7
; CHECK-NEXT: vmov.f32 s18, s11
; CHECK-NEXT: vmullb.u32 q5, q4, q3
; CHECK-NEXT: vmov.f32 s6, s5
; CHECK-NEXT: vmov r10, r5, d10
; CHECK-NEXT: lsrl r10, r5, #31
; CHECK-NEXT: vmov.f32 s14, s13
; CHECK-NEXT: vmov.f32 s10, s9
; CHECK-NEXT: subs.w r6, r10, #-1
; CHECK-NEXT: vmullb.u32 q4, q3, q1
; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
; CHECK-NEXT: bfi r6, r5, #0, #8
; CHECK-NEXT: vmov r4, r5, d11
; CHECK-NEXT: lsrl r4, r5, #31
; CHECK-NEXT: subs.w r7, r4, #-1
; CHECK-NEXT: vmov q2[2], q2[0], r10, r4
; CHECK-NEXT: vmov q3[2], q3[0], r10, r4
; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: csetm r5, lo
; CHECK-NEXT: bfi r6, r5, #8, #8
; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: vpsel q3, q3, q0
; CHECK-NEXT: vmullb.u32 q4, q2, q1
; CHECK-NEXT: vmov r10, r5, d8
; CHECK-NEXT: lsrl r10, r5, #31
; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: subs.w r6, r10, #-1
; CHECK-NEXT: vpsel q2, q2, q0
; CHECK-NEXT: sbcs r5, r5, #0
; CHECK-NEXT: mov.w r6, #0
; CHECK-NEXT: csetm r5, lo
Expand All @@ -820,8 +820,8 @@ define arm_aapcs_vfpcc void @usatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no
; CHECK-NEXT: vmsr p0, r6
; CHECK-NEXT: vpsel q1, q1, q0
; CHECK-NEXT: vmov.f32 s5, s6
; CHECK-NEXT: vmov.f32 s6, s8
; CHECK-NEXT: vmov.f32 s7, s10
; CHECK-NEXT: vmov.f32 s6, s12
; CHECK-NEXT: vmov.f32 s7, s14
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: le lr, .LBB4_4
; CHECK-NEXT: @ %bb.5: @ %middle.block
Expand Down
28 changes: 24 additions & 4 deletions llvm/test/CodeGen/Thumb2/mve-vmull-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0213_0ext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
; CHECK-NEXT: vmullb.s32 q2, q0, q3
; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
; CHECK-NEXT: vmullb.s32 q2, q4, q3
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.s32 q1, q0, q3
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
Expand All @@ -210,12 +215,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: sext32_0ext_0213:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
; CHECK-NEXT: vmullb.s32 q2, q3, q0
; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
; CHECK-NEXT: vmullb.s32 q2, q3, q4
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.s32 q1, q3, q0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
Expand Down Expand Up @@ -466,12 +476,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0213_0ext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
; CHECK-NEXT: vmullb.u32 q2, q0, q3
; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
; CHECK-NEXT: vmullb.u32 q2, q4, q3
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.u32 q1, q0, q3
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
Expand All @@ -486,12 +501,17 @@ entry:
define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) {
; CHECK-LABEL: zext32_0ext_0213:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov q3[2], q3[0], r0, r0
; CHECK-NEXT: vmullb.u32 q2, q3, q0
; CHECK-NEXT: vmov.f32 s17, s4
; CHECK-NEXT: vmov.f32 s0, s1
; CHECK-NEXT: vmullb.u32 q2, q3, q4
; CHECK-NEXT: vmov.f32 s2, s3
; CHECK-NEXT: vmullb.u32 q1, q3, q0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/opt-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Tile Register Pre-configure
; CHECK-NEXT: Detect Dead Lanes
; CHECK-NEXT: Init Undef Pass
; CHECK-NEXT: Process Implicit Definitions
; CHECK-NEXT: Remove unreachable machine basic blocks
; CHECK-NEXT: Live Variable Analysis
Expand Down
1 change: 0 additions & 1 deletion llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ static_library("LLVMRISCVCodeGen") {
"RISCVOptWInstrs.cpp",
"RISCVPostRAExpandPseudoInsts.cpp",
"RISCVPushPopOptimizer.cpp",
"RISCVRVVInitUndef.cpp",
"RISCVRedundantCopyElimination.cpp",
"RISCVRegisterInfo.cpp",
"RISCVSubtarget.cpp",
Expand Down