Skip to content

Commit

Permalink
[SPARC] Clean up the support for disabling fsmuld and fmuls instructi…
Browse files Browse the repository at this point in the history
…ons.

Summary:
Also enable no-fsmuld for sparcv7 (which doesn't have the
instruction).

The previous code which used a post-processing pass to do this was
unnecessary; disabling the instruction is entirely sufficient.

Reviewers: jacob_hansen, ekedaigle

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D35576

llvm-svn: 308661
  • Loading branch information
jyknight committed Jul 20, 2017
1 parent 04787bb commit bb76d48
Show file tree
Hide file tree
Showing 11 changed files with 63 additions and 292 deletions.
14 changes: 0 additions & 14 deletions llvm/lib/Target/Sparc/LeonFeatures.td
Expand Up @@ -52,20 +52,6 @@ def InsertNOPLoad: SubtargetFeature<
"LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction"
>;

def FixFSMULD : SubtargetFeature<
"fixfsmuld",
"FixFSMULD",
"true",
"LEON erratum fix: Do not use FSMULD"
>;

def ReplaceFMULS : SubtargetFeature<
"replacefmuls",
"ReplaceFMULS",
"true",
"LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions"
>;

def DetectRoundChange : SubtargetFeature<
"detectroundchange",
"DetectRoundChange",
Expand Down
209 changes: 0 additions & 209 deletions llvm/lib/Target/Sparc/LeonPasses.cpp
Expand Up @@ -24,39 +24,6 @@ using namespace llvm;
LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID)
: MachineFunctionPass(ID) {}

int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr &MI,
int OperandIndex) {
if (MI.getNumOperands() > 0) {
if (OperandIndex == LAST_OPERAND) {
OperandIndex = MI.getNumOperands() - 1;
}

if (MI.getNumOperands() > (unsigned)OperandIndex &&
MI.getOperand(OperandIndex).isReg()) {
return (int)MI.getOperand(OperandIndex).getReg();
}
}

static int NotFoundIndex = -10;
// Return a different number each time to avoid any comparisons between the
// values returned.
NotFoundIndex -= 10;
return NotFoundIndex;
}

// finds a new free FP register
// checks also the AllocatedRegisters vector
int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) {
for (int RegisterIndex = SP::F0; RegisterIndex <= SP::F31; ++RegisterIndex) {
if (!MRI.isPhysRegUsed(RegisterIndex) &&
!is_contained(UsedRegisters, RegisterIndex)) {
return RegisterIndex;
}
}

return -1;
}

//*****************************************************************************
//**** InsertNOPLoad pass
//*****************************************************************************
Expand Down Expand Up @@ -93,182 +60,6 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
return Modified;
}

//*****************************************************************************
//**** FixFSMULD pass
//*****************************************************************************
// This pass fixes the incorrectly working FSMULD instruction that exists for
// some earlier versions of the LEON processor line.
//
// The pass should convert the FSMULD operands to double precision in scratch
// registers, then calculate the result with the FMULD instruction. Therefore,
// the pass should replace operations of the form:
// fsmuld %f20,%f21,%f8
// with the sequence:
// fstod %f20,%f0
// fstod %f21,%f2
// fmuld %f0,%f2,%f8
//
char FixFSMULD::ID = 0;

FixFSMULD::FixFSMULD() : LEONMachineFunctionPass(ID) {}

bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();

bool Modified = false;
for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
MachineBasicBlock &MBB = *MFI;
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {

MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();

const int UNASSIGNED_INDEX = -1;
int Reg1Index = UNASSIGNED_INDEX;
int Reg2Index = UNASSIGNED_INDEX;
int Reg3Index = UNASSIGNED_INDEX;

if (Opcode == SP::FSMULD && MI.getNumOperands() == 3) {
// take the registers from fsmuld %f20,%f21,%f8
Reg1Index = MI.getOperand(0).getReg();
Reg2Index = MI.getOperand(1).getReg();
Reg3Index = MI.getOperand(2).getReg();
}

if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
Reg3Index != UNASSIGNED_INDEX) {
clearUsedRegisterList();
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
// Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
markRegisterUsed(Reg3Index);
const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg1Index);
const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg2Index);

if (ScratchReg1Index == UNASSIGNED_INDEX ||
ScratchReg2Index == UNASSIGNED_INDEX) {
errs() << "Cannot allocate free scratch registers for the FixFSMULD "
"pass."
<< "\n";
} else {
// create fstod %f20,%f0
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg1Index)
.addReg(Reg1Index);

// create fstod %f21,%f2
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg2Index)
.addReg(Reg2Index);

// create fmuld %f0,%f2,%f8
BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
.addReg(Reg3Index)
.addReg(ScratchReg1Index)
.addReg(ScratchReg2Index);

MI.eraseFromParent();
MBBI = NMBBI;

Modified = true;
}
}
}
}

return Modified;
}

//*****************************************************************************
//**** ReplaceFMULS pass
//*****************************************************************************
// This pass fixes the incorrectly working FMULS instruction that exists for
// some earlier versions of the LEON processor line.
//
// This pass converts the FMULS operands to double precision in scratch
// registers, then calculates the result with the FMULD instruction.
// The pass should replace operations of the form:
// fmuls %f20,%f21,%f8
// with the sequence:
// fstod %f20,%f0
// fstod %f21,%f2
// fmuld %f0,%f2,%f8
//
char ReplaceFMULS::ID = 0;

ReplaceFMULS::ReplaceFMULS() : LEONMachineFunctionPass(ID) {}

bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
DebugLoc DL = DebugLoc();

bool Modified = false;
for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
MachineBasicBlock &MBB = *MFI;
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();

const int UNASSIGNED_INDEX = -1;
int Reg1Index = UNASSIGNED_INDEX;
int Reg2Index = UNASSIGNED_INDEX;
int Reg3Index = UNASSIGNED_INDEX;

if (Opcode == SP::FMULS && MI.getNumOperands() == 3) {
// take the registers from fmuls %f20,%f21,%f8
Reg1Index = MI.getOperand(0).getReg();
Reg2Index = MI.getOperand(1).getReg();
Reg3Index = MI.getOperand(2).getReg();
}

if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
Reg3Index != UNASSIGNED_INDEX) {
clearUsedRegisterList();
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
// Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
markRegisterUsed(Reg3Index);
const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg1Index);
const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
markRegisterUsed(ScratchReg2Index);

if (ScratchReg1Index == UNASSIGNED_INDEX ||
ScratchReg2Index == UNASSIGNED_INDEX) {
errs() << "Cannot allocate free scratch registers for the "
"ReplaceFMULS pass."
<< "\n";
} else {
// create fstod %f20,%f0
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg1Index)
.addReg(Reg1Index);

// create fstod %f21,%f2
BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
.addReg(ScratchReg2Index)
.addReg(Reg2Index);

// create fmuld %f0,%f2,%f8
BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
.addReg(Reg3Index)
.addReg(ScratchReg1Index)
.addReg(ScratchReg2Index);

MI.eraseFromParent();
MBBI = NMBBI;

Modified = true;
}
}
}
}

return Modified;
}


//*****************************************************************************
Expand Down
26 changes: 0 additions & 26 deletions llvm/lib/Target/Sparc/LeonPasses.h
Expand Up @@ -57,32 +57,6 @@ class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
}
};

class LLVM_LIBRARY_VISIBILITY FixFSMULD : public LEONMachineFunctionPass {
public:
static char ID;

FixFSMULD();
bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
}
};

class LLVM_LIBRARY_VISIBILITY ReplaceFMULS : public LEONMachineFunctionPass {
public:
static char ID;

ReplaceFMULS();
bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a "
"routine using conversions/double precision operations to replace "
"FMULS";
}
};

class LLVM_LIBRARY_VISIBILITY DetectRoundChange
: public LEONMachineFunctionPass {
public:
Expand Down
17 changes: 12 additions & 5 deletions llvm/lib/Target/Sparc/Sparc.td
Expand Up @@ -24,6 +24,13 @@ def FeatureSoftMulDiv
: SubtargetFeature<"soft-mul-div", "UseSoftMulDiv", "true",
"Use software emulation for integer multiply and divide">;

def FeatureNoFSMULD
: SubtargetFeature<"no-fsmuld", "HasNoFSMULD", "true",
"Disable the fsmuld instruction.">;
def FeatureNoFMULS
: SubtargetFeature<"no-fmuls", "HasNoFMULS", "true",
"Disable the fmuls instruction.">;

def FeatureV9
: SubtargetFeature<"v9", "IsV9", "true",
"Enable SPARC-V9 instructions">;
Expand Down Expand Up @@ -51,9 +58,9 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true",
"Use the popc (population count) instruction">;

def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software emulation for floating point">;
"Use software emulation for floating point">;

//==== Features added predmoninantly for LEON subtarget support
//==== Features added predmoninantly for LEON subtarget support
include "LeonFeatures.td"

//===----------------------------------------------------------------------===//
Expand All @@ -79,7 +86,7 @@ class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;

def : Proc<"generic", []>;
def : Proc<"v7", [FeatureSoftMulDiv]>;
def : Proc<"v7", [FeatureSoftMulDiv, FeatureNoFSMULD]>;
def : Proc<"v8", []>;
def : Proc<"supersparc", []>;
def : Proc<"sparclite", []>;
Expand Down Expand Up @@ -128,8 +135,8 @@ def : Processor<"leon3", LEON3Itineraries,

// LEON 3 FT (UT699). Provides features for the UT699 processor
// - covers all the erratum fixes for LEON3, but does not support the CASA instruction.
def : Processor<"ut699", LEON3Itineraries,
[FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>;
def : Processor<"ut699", LEON3Itineraries,
[FeatureLeon, InsertNOPLoad, FeatureNoFSMULD, FeatureNoFMULS, FixAllFDIVSQRT]>;

// LEON3 FT (GR712RC). Provides features for the GR712RC processor.
// - covers all the erratum fixed for LEON3 and support for the CASA instruction.
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Expand Up @@ -1828,9 +1828,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSQRT, MVT::f32, Promote);
}

if (Subtarget->replaceFMULS()) {
// Promote FMULS to FMULD instructions instead as
// the former instructions generate errata on LEON processors.
if (Subtarget->hasNoFMULS()) {
setOperationAction(ISD::FMUL, MVT::f32, Promote);
}

Expand Down
14 changes: 6 additions & 8 deletions llvm/lib/Target/Sparc/SparcInstrInfo.td
Expand Up @@ -61,8 +61,8 @@ def HasLeonCASA : Predicate<"Subtarget->hasLeonCasa()">;
def HasUMAC_SMAC : Predicate<"Subtarget->hasUmacSmac()">;

def HasNoFdivSqrtFix : Predicate<"!Subtarget->fixAllFDIVSQRT()">;
def HasNoFmulsFix : Predicate<"!Subtarget->replaceFMULS()">;
def HasNoFsmuldFix : Predicate<"!Subtarget->fixFSMULD()">;
def HasFMULS : Predicate<"!Subtarget->hasNoFMULS()">;
def HasFSMULD : Predicate<"!Subtarget->hasNoFSMULD()">;

// UseDeprecatedInsts - This predicate is true when the target processor is a
// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
Expand Down Expand Up @@ -1236,14 +1236,12 @@ def FSUBQ : F3_3<2, 0b110100, 0b001000111,


// Floating-point Multiply and Divide Instructions, p. 147
// FMULS generates an erratum on LEON processors, so by disabling this instruction
// this will be promoted to use FMULD with doubles instead.
let Predicates = [HasNoFmulsFix] in
def FMULS : F3_3<2, 0b110100, 0b001001001,
(outs FPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
"fmuls $rs1, $rs2, $rd",
[(set f32:$rd, (fmul f32:$rs1, f32:$rs2))],
IIC_fpu_muls>;
IIC_fpu_muls>,
Requires<[HasFMULS]>;
def FMULD : F3_3<2, 0b110100, 0b001001010,
(outs DFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
"fmuld $rs1, $rs2, $rd",
Expand All @@ -1255,13 +1253,13 @@ def FMULQ : F3_3<2, 0b110100, 0b001001011,
[(set f128:$rd, (fmul f128:$rs1, f128:$rs2))]>,
Requires<[HasHardQuad]>;

let Predicates = [HasNoFsmuldFix] in
def FSMULD : F3_3<2, 0b110100, 0b001101001,
(outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
"fsmuld $rs1, $rs2, $rd",
[(set f64:$rd, (fmul (fpextend f32:$rs1),
(fpextend f32:$rs2)))],
IIC_fpu_muld>;
IIC_fpu_muld>,
Requires<[HasFSMULD]>;
def FDMULQ : F3_3<2, 0b110100, 0b001101110,
(outs QFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
"fdmulq $rs1, $rs2, $rd",
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/Sparc/SparcSubtarget.cpp
Expand Up @@ -36,14 +36,14 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
HasHardQuad = false;
UsePopc = false;
UseSoftFloat = false;
HasNoFSMULD = false;
HasNoFMULS = false;

// Leon features
HasLeonCasa = false;
HasUmacSmac = false;
PerformSDIVReplace = false;
InsertNOPLoad = false;
FixFSMULD = false;
ReplaceFMULS = false;
FixAllFDIVSQRT = false;
DetectRoundChange = false;

Expand Down

0 comments on commit bb76d48

Please sign in to comment.