Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'master' of http://llvm.org/git/llvm into mico32

  • Loading branch information...
commit 867b0a9ddf7529ec2404d6546f0768cd410ba75c 2 parents 2a1ffcf + cff6193
@jpbonn jpbonn authored
Showing with 1,071 additions and 875 deletions.
  1. +39 −0 docs/ProgrammersManual.html
  2. +4 −3 include/llvm/ADT/APInt.h
  3. +1 −1  include/llvm/ADT/PackedVector.h
  4. +10 −1 include/llvm/Analysis/MemoryDependenceAnalysis.h
  5. +4 −1 include/llvm/Attributes.h
  6. +6 −0 include/llvm/CodeGen/ScheduleDAG.h
  7. +12 −1 include/llvm/MC/MCContext.h
  8. +9 −40 include/llvm/Target/TargetRegisterInfo.h
  9. +0 −6 include/llvm/Transforms/Utils/Cloning.h
  10. +19 −10 lib/Analysis/MemDepPrinter.cpp
  11. +18 −22 lib/Analysis/MemoryDependenceAnalysis.cpp
  12. +5 −2 lib/AsmParser/LLLexer.cpp
  13. +1 −0  lib/AsmParser/LLParser.cpp
  14. +1 −0  lib/AsmParser/LLToken.h
  15. +2 −0  lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  16. +12 −0 lib/CodeGen/ScheduleDAG.cpp
  17. +7 −3 lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  18. +2 −1  lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
  19. +1 −1  lib/CodeGen/SelectionDAG/LegalizeTypes.h
  20. +42 −14 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
  21. +1 −1  lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
  22. +29 −4 lib/MC/MCParser/AsmParser.cpp
  23. +34 −3 lib/Support/APInt.cpp
  24. +110 −9 lib/Target/ARM/ARMISelLowering.cpp
  25. +4 −0 lib/Target/ARM/ARMInstrInfo.td
  26. +4 −0 lib/Target/ARM/ARMInstrThumb2.td
  27. +2 −2 lib/Target/CBackend/CBackend.cpp
  28. +6 −7 lib/Target/PowerPC/PPCISelLowering.cpp
  29. +56 −34 lib/Target/PowerPC/PPCJITInfo.cpp
  30. +11 −1 lib/Target/SystemZ/SystemZRegisterInfo.cpp
  31. +8 −259 lib/Target/SystemZ/SystemZRegisterInfo.td
  32. +2 −6 lib/Target/TargetRegisterInfo.cpp
  33. +20 −0 lib/Target/X86/X86ISelLowering.cpp
  34. +12 −13 lib/Transforms/Scalar/DeadStoreElimination.cpp
  35. +24 −8 lib/Transforms/Scalar/GVN.cpp
  36. +9 −11 lib/Transforms/Scalar/MemCpyOptimizer.cpp
  37. +0 −1  lib/Transforms/Utils/CMakeLists.txt
  38. +0 −128 lib/Transforms/Utils/CloneLoop.cpp
  39. +2 −26 lib/Transforms/Utils/Local.cpp
  40. +19 −1 lib/VMCore/AsmWriter.cpp
  41. +2 −0  lib/VMCore/Attributes.cpp
  42. +2 −7 runtime/libprofile/Makefile
  43. +13 −0 test/CodeGen/ARM/bfi.ll
  44. +33 −0 test/CodeGen/ARM/jumptable-label.ll
  45. +13 −0 test/CodeGen/ARM/rev.ll
  46. +14 −0 test/CodeGen/ARM/vpadd.ll
  47. +2 −2 test/CodeGen/SystemZ/11-BSwap.ll
  48. +19 −0 test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
  49. +17 −0 test/CodeGen/X86/4char-promote.ll
  50. +27 −0 test/CodeGen/X86/non-lazy-bind.ll
  51. +53 −0 test/CodeGen/X86/shl_undef.ll
  52. +5 −2 test/CodeGen/X86/tail-threshold.ll
  53. +2 −0  test/Feature/paramattrs.ll
  54. +15 −15 test/MC/X86/x86-64.s
  55. +115 −0 unittests/ADT/PackedVectorTest.cpp
  56. +1 −0  unittests/CMakeLists.txt
  57. +4 −4 utils/TableGen/AsmMatcherEmitter.cpp
  58. +26 −4 utils/TableGen/AsmWriterEmitter.cpp
  59. +2 −1  utils/TableGen/CodeGenInstruction.cpp
  60. +94 −9 utils/TableGen/CodeGenRegisters.cpp
  61. +39 −30 utils/TableGen/CodeGenRegisters.h
  62. +5 −16 utils/TableGen/CodeGenTarget.cpp
  63. +2 −64 utils/TableGen/CodeGenTarget.h
  64. +2 −2 utils/TableGen/DAGISelMatcherGen.cpp
  65. +1 −1  utils/TableGen/FastISelEmitter.cpp
  66. +4 −4 utils/TableGen/InstrInfoEmitter.cpp
  67. +9 −94 utils/TableGen/RegisterInfoEmitter.cpp
  68. +2 −0  utils/llvm.grm
View
39 docs/ProgrammersManual.html
@@ -64,6 +64,7 @@
<li><a href="#dss_deque">&lt;deque&gt;</a></li>
<li><a href="#dss_list">&lt;list&gt;</a></li>
<li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
+ <li><a href="#dss_packedvector">llvm/ADT/PackedVector.h</a></li>
<li><a href="#dss_other">Other Sequential Container Options</a></li>
</ul></li>
<li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
@@ -1069,6 +1070,44 @@
<!-- _______________________________________________________________________ -->
<h4>
+ <a name="dss_packedvector">llvm/ADT/PackedVector.h</a>
+</h4>
+
+<div>
+<p>
+Useful for storing a vector of values using only a few number of bits for each
+value. Apart from the standard operations of a vector-like container, it can
+also perform an 'or' set operation.
+</p>
+
+<p>For example:</p>
+
+<div class="doc_code">
+<pre>
+enum State {
+ None = 0x0,
+ FirstCondition = 0x1,
+ SecondCondition = 0x2,
+ Both = 0x3
+};
+
+State get() {
+ PackedVector&lt;State, 2&gt; Vec1;
+ Vec1.push_back(FirstCondition);
+
+ PackedVector&lt;State, 2&gt; Vec2;
+ Vec2.push_back(SecondCondition);
+
+ Vec1 |= Vec2;
+ return Vec1[0]; // returns 'Both'.
+}
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
<a name="dss_ilist_traits">ilist_traits</a>
</h4>
View
7 include/llvm/ADT/APInt.h
@@ -1241,18 +1241,19 @@ class APInt {
/// toString - Converts an APInt to a string and append it to Str. Str is
/// commonly a SmallString.
- void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed) const;
+ void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
+ bool formatAsCLiteral = false) const;
/// Considers the APInt to be unsigned and converts it into a string in the
/// radix given. The radix can be 2, 8, 10 or 16.
void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
- toString(Str, Radix, false);
+ toString(Str, Radix, false, false);
}
/// Considers the APInt to be signed and converts it into a string in the
/// radix given. The radix can be 2, 8, 10 or 16.
void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
- toString(Str, Radix, true);
+ toString(Str, Radix, true, false);
}
/// toString - This returns the APInt as a std::string. Note that this is an
View
2  include/llvm/ADT/PackedVector.h
@@ -90,7 +90,7 @@ class PackedVector : public PackedVectorBase<T, BitNum,
Vec.setValue(Vec.Bits, Idx, val);
return *this;
}
- operator T() {
+ operator T() const {
return Vec.getValue(Vec.Bits, Idx);
}
};
View
11 include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -90,18 +90,27 @@ namespace llvm {
/// get methods: These are static ctor methods for creating various
/// MemDepResult kinds.
static MemDepResult getDef(Instruction *Inst) {
+ assert(Inst && "Def requires inst");
return MemDepResult(PairTy(Inst, Def));
}
static MemDepResult getClobber(Instruction *Inst) {
+ assert(Inst && "Clobber requires inst");
return MemDepResult(PairTy(Inst, Clobber));
}
static MemDepResult getNonLocal() {
return MemDepResult(PairTy(0, NonLocal));
}
+ static MemDepResult getUnknown() {
+ return MemDepResult(PairTy(0, Clobber));
+ }
/// isClobber - Return true if this MemDepResult represents a query that is
/// a instruction clobber dependency.
- bool isClobber() const { return Value.getInt() == Clobber; }
+ bool isClobber() const { return Value.getInt() == Clobber && getInst(); }
+
+ /// isUnknown - Return true if this MemDepResult represents a query which
+ /// cannot and/or will not be computed.
+ bool isUnknown() const { return Value.getInt() == Clobber && !getInst(); }
/// isDef - Return true if this MemDepResult represents a query that is
/// a instruction definition dependency.
View
5 include/llvm/Attributes.h
@@ -69,6 +69,9 @@ const Attributes Hotpatch = 1<<29; ///< Function should have special
///'hotpatch' sequence in prologue
const Attributes UWTable = 1<<30; ///< Function must be in a unwind
///table
+const Attributes NonLazyBind = 1U<<31; ///< Function is called early and/or
+ /// often, so lazy binding isn't
+ /// worthwhile.
/// Note that uwtable is about the ABI or the user mandating an entry in the
/// unwind table. The nounwind attribute is about an exception passing by the
@@ -90,7 +93,7 @@ const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment |
- Hotpatch | UWTable;
+ Hotpatch | UWTable | NonLazyBind;
/// @brief Parameter attributes that do not apply to vararg call arguments.
const Attributes VarArgsIncompatible = StructRet;
View
6 include/llvm/CodeGen/ScheduleDAG.h
@@ -497,6 +497,12 @@ namespace llvm {
SUnit EntrySU; // Special node for the region entry.
SUnit ExitSU; // Special node for the region exit.
+#ifdef NDEBUG
+ static const bool StressSched = false;
+#else
+ bool StressSched;
+#endif
+
explicit ScheduleDAG(MachineFunction &mf);
virtual ~ScheduleDAG();
View
13 include/llvm/MC/MCContext.h
@@ -39,6 +39,9 @@ namespace llvm {
class MCContext {
MCContext(const MCContext&); // DO NOT IMPLEMENT
MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT
+ public:
+ typedef StringMap<MCSymbol*, BumpPtrAllocator&> SymbolTable;
+ private:
/// The MCAsmInfo for this target.
const MCAsmInfo &MAI;
@@ -52,7 +55,7 @@ namespace llvm {
BumpPtrAllocator Allocator;
/// Symbols - Bindings of names to symbols.
- StringMap<MCSymbol*, BumpPtrAllocator&> Symbols;
+ SymbolTable Symbols;
/// UsedNames - Keeps tracks of names that were used both for used declared
/// and artificial symbols.
@@ -142,6 +145,14 @@ namespace llvm {
/// LookupSymbol - Get the symbol for \p Name, or null.
MCSymbol *LookupSymbol(StringRef Name) const;
+ /// getSymbols - Get a reference for the symbol table for clients that
+ /// want to, for example, iterate over all symbols. 'const' because we
+ /// still want any modifications to the table itself to use the MCContext
+ /// APIs.
+ const SymbolTable &getSymbols() const {
+ return Symbols;
+ }
+
/// @}
/// @name Section Management
View
49 include/llvm/Target/TargetRegisterInfo.h
@@ -285,11 +285,6 @@ class TargetRegisterClass {
/// descriptor.
///
class TargetRegisterInfo {
-protected:
- const unsigned* SubregHash;
- const unsigned SubregHashSize;
- const unsigned* AliasesHash;
- const unsigned AliasesHashSize;
public:
typedef const TargetRegisterClass * const * regclass_iterator;
private:
@@ -307,11 +302,7 @@ class TargetRegisterInfo {
regclass_iterator RegClassEnd,
const char *const *subregindexnames,
int CallFrameSetupOpcode = -1,
- int CallFrameDestroyOpcode = -1,
- const unsigned* subregs = 0,
- const unsigned subregsize = 0,
- const unsigned* aliases = 0,
- const unsigned aliasessize = 0);
+ int CallFrameDestroyOpcode = -1);
virtual ~TargetRegisterInfo();
public:
@@ -468,50 +459,28 @@ class TargetRegisterInfo {
/// regsOverlap - Returns true if the two registers are equal or alias each
/// other. The registers may be virtual register.
bool regsOverlap(unsigned regA, unsigned regB) const {
- if (regA == regB)
- return true;
-
- if (regA > regB)
- std::swap(regA, regB);
-
+ if (regA == regB) return true;
if (isVirtualRegister(regA) || isVirtualRegister(regB))
return false;
-
- // regA and regB are distinct physical registers. Do they alias?
- size_t index = (regA * 11 + regB * 97) & (AliasesHashSize-1);
- unsigned ProbeAmt = 1;
- while (AliasesHash[index*2] != 0 && AliasesHash[index*2+1] != 0) {
- if (AliasesHash[index*2] == regA && AliasesHash[index*2+1] == regB)
- return true;
-
- index = (index + ProbeAmt) & (AliasesHashSize-1);
- ProbeAmt += 1;
+ for (const unsigned *regList = getOverlaps(regA)+1; *regList; ++regList) {
+ if (*regList == regB) return true;
}
-
return false;
}
/// isSubRegister - Returns true if regB is a sub-register of regA.
///
bool isSubRegister(unsigned regA, unsigned regB) const {
- // SubregHash is a simple quadratically probed hash table.
- size_t index = (regA * 11 + regB * 97) & (SubregHashSize-1);
- unsigned ProbeAmt = 1;
- while (SubregHash[index*2] != 0 && SubregHash[index*2+1] != 0) {
- if (SubregHash[index*2] == regA && SubregHash[index*2+1] == regB)
- return true;
-
- index = (index + ProbeAmt) & (SubregHashSize-1);
- ProbeAmt += 1;
- }
-
- return false;
+ return isSuperRegister(regB, regA);
}
/// isSuperRegister - Returns true if regB is a super-register of regA.
///
bool isSuperRegister(unsigned regA, unsigned regB) const {
- return isSubRegister(regB, regA);
+ for (const unsigned *regList = getSuperRegisters(regA); *regList;++regList){
+ if (*regList == regB) return true;
+ }
+ return false;
}
/// getCalleeSavedRegs - Return a null-terminated list of all of the
View
6 include/llvm/Transforms/Utils/Cloning.h
@@ -107,12 +107,6 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB,
const Twine &NameSuffix = "", Function *F = 0,
ClonedCodeInfo *CodeInfo = 0);
-
-/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate
-/// VMap using old blocks to new blocks mapping.
-Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI,
- ValueToValueMapTy &VMap, Pass *P);
-
/// CloneFunction - Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
/// in the VMap are changed to refer to their mapped value instead of the
View
29 lib/Analysis/MemDepPrinter.cpp
@@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
MemDepResult Res = MDA.getDependency(Inst);
if (!Res.isNonLocal()) {
- assert(Res.isClobber() != Res.isDef() &&
- "Local dep should be def or clobber!");
+ assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+ "Local dep should be unknown, def or clobber!");
Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
Res.isClobber()),
static_cast<BasicBlock *>(0)));
@@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
const MemDepResult &Res = I->getResult();
- assert(Res.isClobber() != Res.isDef() &&
- "Resolved non-local call dep should be def or clobber!");
+ assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+ "Resolved non-local call dep should be unknown, def or "
+ "clobber!");
InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
Res.isClobber()),
I->getBB()));
@@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
bool isClobber = I->first.getInt();
const BasicBlock *DepBB = I->second;
- OS << " " << (isClobber ? "Clobber" : " Def");
+ OS << " ";
+ if (!DepInst)
+ OS << "Unknown";
+ else if (isClobber)
+ OS << "Clobber";
+ else
+ OS << " Def";
if (DepBB) {
OS << " in block ";
WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
}
- OS << " from: ";
- if (DepInst == Inst)
- OS << "<unspecified>";
- else
- DepInst->print(OS);
+ if (DepInst) {
+ OS << " from: ";
+ if (DepInst == Inst)
+ OS << "<unspecified>";
+ else
+ DepInst->print(OS);
+ }
OS << "\n";
}
View
40 lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -215,11 +215,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
}
}
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getClobber(ScanIt);
+ return MemDepResult::getUnknown();
}
/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
@@ -458,11 +458,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
}
}
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (BB != &BB->getParent()->getEntryBlock())
return MemDepResult::getNonLocal();
- return MemDepResult::getClobber(ScanIt);
+ return MemDepResult::getUnknown();
}
/// getDependency - Return the instruction on which a memory operation
@@ -490,12 +490,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
// Do the scan.
if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
- // No dependence found. If this is the entry block of the function, it is a
- // clobber, otherwise it is non-local.
+ // No dependence found. If this is the entry block of the function, it is
+ // unknown, otherwise it is non-local.
if (QueryParent != &QueryParent->getParent()->getEntryBlock())
LocalCache = MemDepResult::getNonLocal();
else
- LocalCache = MemDepResult::getClobber(QueryInst);
+ LocalCache = MemDepResult::getUnknown();
} else {
AliasAnalysis::Location MemLoc;
AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -514,7 +514,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
QueryParent);
} else
// Non-memory instruction.
- LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+ LocalCache = MemDepResult::getUnknown();
}
// Remember the result!
@@ -648,10 +648,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
} else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
// No dependence found. If this is the entry block of the function, it is
- // a clobber, otherwise it is non-local.
+ // a clobber, otherwise it is unknown.
Dep = MemDepResult::getNonLocal();
} else {
- Dep = MemDepResult::getClobber(ScanPos);
+ Dep = MemDepResult::getUnknown();
}
// If we had a dirty entry for the block, update it. Otherwise, just add
@@ -707,7 +707,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
return;
Result.clear();
Result.push_back(NonLocalDepResult(FromBB,
- MemDepResult::getClobber(FromBB->begin()),
+ MemDepResult::getUnknown(),
const_cast<Value *>(Loc.Ptr)));
}
@@ -769,7 +769,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
// to Cache!
- if (Dep.isNonLocal())
+ if (Dep.isNonLocal() || Dep.isUnknown())
return Dep;
// Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
@@ -1091,16 +1091,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// If getNonLocalPointerDepFromBB fails here, that means the cached
// result conflicted with the Visited list; we have to conservatively
- // assume a clobber, but this also does not block PRE of the load.
+ // assume it is unknown, but this also does not block PRE of the load.
if (!CanTranslate ||
getNonLocalPointerDepFromBB(PredPointer,
Loc.getWithNewPtr(PredPtrVal),
isLoad, Pred,
Result, Visited)) {
// Add the entry to the Result list.
- NonLocalDepResult Entry(Pred,
- MemDepResult::getClobber(Pred->getTerminator()),
- PredPtrVal);
+ NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
Result.push_back(Entry);
// Since we had a phi translation failure, the cache for CacheKey won't
@@ -1145,8 +1143,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// results from the set". Clear out the indicator for this.
CacheInfo->Pair = BBSkipFirstBlockPair();
- // If *nothing* works, mark the pointer as being clobbered by the first
- // instruction in this block.
+ // If *nothing* works, mark the pointer as unknown.
//
// If this is the magic first block, return this as a clobber of the whole
// incoming value. Since we can't phi translate to one of the predecessors,
@@ -1161,8 +1158,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
- I->setResult(MemDepResult::getClobber(BB->getTerminator()));
- ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey);
+ I->setResult(MemDepResult::getUnknown());
Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
Pointer.getAddr()));
break;
View
7 lib/AsmParser/LLLexer.cpp
@@ -422,13 +422,15 @@ static bool JustWhitespaceNewLine(const char *&Ptr) {
/// !
lltok::Kind LLLexer::LexExclaim() {
// Lex a metadata name as a MetadataVar.
- if (isalpha(CurPtr[0])) {
+ if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+ CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
++CurPtr;
while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
- CurPtr[0] == '.' || CurPtr[0] == '_')
+ CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
++CurPtr;
StrVal.assign(TokStart+1, CurPtr); // Skip !
+ UnEscapeLexed(StrVal);
return lltok::MetadataVar;
}
return lltok::exclaim;
@@ -570,6 +572,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noimplicitfloat);
KEYWORD(naked);
KEYWORD(hotpatch);
+ KEYWORD(nonlazybind);
KEYWORD(type);
KEYWORD(opaque);
View
1  lib/AsmParser/LLParser.cpp
@@ -985,6 +985,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
case lltok::kw_naked: Attrs |= Attribute::Naked; break;
case lltok::kw_hotpatch: Attrs |= Attribute::Hotpatch; break;
+ case lltok::kw_nonlazybind: Attrs |= Attribute::NonLazyBind; break;
case lltok::kw_alignstack: {
unsigned Alignment;
View
1  lib/AsmParser/LLToken.h
@@ -99,6 +99,7 @@ namespace lltok {
kw_noimplicitfloat,
kw_naked,
kw_hotpatch,
+ kw_nonlazybind,
kw_type,
kw_opaque,
View
2  lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1948,6 +1948,8 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
for (MachineInstr::mop_iterator OI = MI.operands_begin(),
OE = MI.operands_end(); OI != OE; ++OI) {
const MachineOperand& OP = *OI;
+ if (OP.isJTI())
+ return false;
if (OP.isMBB() && OP.getMBB() == MBB)
return false;
}
View
12 lib/CodeGen/ScheduleDAG.cpp
@@ -19,17 +19,27 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <climits>
using namespace llvm;
+#ifndef NDEBUG
+cl::opt<bool> StressSchedOpt(
+ "stress-sched", cl::Hidden, cl::init(false),
+ cl::desc("Stress test instruction scheduling"));
+#endif
+
ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()),
TII(TM.getInstrInfo()),
TRI(TM.getRegisterInfo()),
MF(mf), MRI(mf.getRegInfo()),
EntrySU(), ExitSU() {
+#ifndef NDEBUG
+ StressSched = StressSchedOpt;
+#endif
}
ScheduleDAG::~ScheduleDAG() {}
@@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
if (I->isArtificial())
dbgs() << " *";
dbgs() << ": Latency=" << I->getLatency();
+ if (I->isAssignedRegDep())
+ dbgs() << " Reg=" << G->TRI->getName(I->getReg());
dbgs() << "\n";
}
}
View
10 lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3030,6 +3030,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
return N0;
+ // fold (shl undef, x) -> 0
+ if (N0.getOpcode() == ISD::UNDEF)
+ return DAG.getConstant(0, VT);
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
@@ -6425,14 +6428,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// FIXME: is there such a thing as a truncating indexed store?
if (ST->isTruncatingStore() && ST->isUnindexed() &&
- Value.getValueType().isInteger() && !Value.getValueType().isVector()) {
+ Value.getValueType().isInteger()) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
SDValue Shorter =
GetDemandedBits(Value,
- APInt::getLowBitsSet(Value.getValueSizeInBits(),
- ST->getMemoryVT().getSizeInBits()));
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits()));
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
View
3  lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -972,7 +972,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue Op = GetPromotedInteger(N->getOperand(0));
Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
- return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+ return DAG.getZeroExtendInReg(Op, dl,
+ N->getOperand(0).getValueType().getScalarType());
}
View
2  lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -201,7 +201,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
Op = GetPromotedInteger(Op);
- return DAG.getZeroExtendInReg(Op, dl, OldVT);
+ return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
}
// Integer Result Promotion.
View
56 lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1369,6 +1369,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
};
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+ SF &SortFunc;
+ reverse_sort(SF &sf) : SortFunc(sf) {}
+ reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+ bool operator()(SUnit* left, SUnit* right) const {
+ // reverse left/right rather than simply !SortFunc(left, right)
+ // to expose different paths in the comparison logic.
+ return SortFunc(right, left);
+ }
+};
+#endif // NDEBUG
+
/// bu_ls_rr_sort - Priority function for bottom up register pressure
// reduction scheduler.
struct bu_ls_rr_sort : public queue_sort {
@@ -1569,20 +1584,33 @@ class RegReductionPQBase : public SchedulingPriorityQueue {
};
template<class SF>
-class RegReductionPriorityQueue : public RegReductionPQBase {
- static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
- std::vector<SUnit *>::iterator Best = Q.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
- E = Q.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != prior(Q.end()))
- std::swap(*Best, Q.back());
- Q.pop_back();
- return V;
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+ if (DAG->StressSched) {
+ reverse_sort<SF> RPicker(Picker);
+ return popFromQueueImpl(Q, RPicker);
}
+#endif
+ (void)DAG;
+ return popFromQueueImpl(Q, Picker);
+}
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
SF Picker;
public:
@@ -1603,7 +1631,7 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
SUnit *pop() {
if (Queue.empty()) return NULL;
- SUnit *V = popFromQueue(Queue, Picker);
+ SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
V->NodeQueueId = 0;
return V;
}
@@ -1613,7 +1641,7 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
std::vector<SUnit*> DumpQueue = Queue;
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
- SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
if (isBottomUp())
dbgs() << "Height " << SU->getHeight() << ": ";
else
View
2  lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -435,7 +435,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
// it requires a cross class copy (cost < 0). That means we are only
// treating "expensive to copy" register dependency as physical register
// dependency. This may change in the future though.
- if (Cost >= 0)
+ if (Cost >= 0 && !StressSched)
PhysReg = 0;
// If this is a ctrl dep, latency is 1.
View
33 lib/MC/MCParser/AsmParser.cpp
@@ -84,6 +84,7 @@ class AsmParser : public MCAsmParser {
AsmLexer Lexer;
MCContext &Ctx;
MCStreamer &Out;
+ const MCAsmInfo &MAI;
SourceMgr &SrcMgr;
MCAsmParserExtension *GenericParser;
MCAsmParserExtension *PlatformParser;
@@ -135,7 +136,7 @@ class AsmParser : public MCAsmParser {
virtual MCContext &getContext() { return Ctx; }
virtual MCStreamer &getStreamer() { return Out; }
- virtual bool Warning(SMLoc L, const Twine &Meg);
+ virtual bool Warning(SMLoc L, const Twine &Msg);
virtual bool Error(SMLoc L, const Twine &Msg);
const AsmToken &Lex();
@@ -160,8 +161,9 @@ class AsmParser : public MCAsmParser {
void HandleMacroExit();
void PrintMacroInstantiations();
- void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
- SrcMgr.PrintMessage(Loc, Msg, Type);
+ void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
+ bool ShowLine = true) const {
+ SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
}
/// EnterIncludeFile - Enter the specified file. This returns true on failure.
@@ -337,7 +339,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
MCStreamer &_Out, const MCAsmInfo &_MAI)
- : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
+ : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
CurBuffer(0), MacrosEnabled(true) {
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
@@ -466,6 +468,29 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
TokError("unassigned file number: " + Twine(i) + " for .file directives");
}
+ // Check to see that all assembler local symbols were actually defined.
+ // Targets that don't do subsections via symbols may not want this, though,
+ // so conservatively exclude them. Only do this if we're finalizing, though,
+ // as otherwise we won't necessarilly have seen everything yet.
+ if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
+ const MCContext::SymbolTable &Symbols = getContext().getSymbols();
+ for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
+ e = Symbols.end();
+ i != e; ++i) {
+ MCSymbol *Sym = i->getValue();
+ // Variable symbols may not be marked as defined, so check those
+ // explicitly. If we know it's a variable, we have a definition for
+ // the purposes of this check.
+ if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
+ // FIXME: We would really like to refer back to where the symbol was
+ // first referenced for a source location. We need to add something
+ // to track that. Currently, we just point to the end of the file.
+ PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
+ Sym->getName() + "' not defined", "error", false);
+ }
+ }
+
+
// Finalize the output stream if there are no errors and if the client wants
// us to.
if (!HadError && !NoFinalize)
View
37 lib/Support/APInt.cpp
@@ -2164,12 +2164,33 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
}
void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
- bool Signed) const {
+ bool Signed, bool formatAsCLiteral) const {
assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
"Radix should be 2, 8, 10, or 16!");
+ const char *Prefix = "";
+ if (formatAsCLiteral) {
+ switch (Radix) {
+ case 2:
+ // Binary literals are a non-standard extension added in gcc 4.3:
+ // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+ Prefix = "0b";
+ break;
+ case 8:
+ Prefix = "0";
+ break;
+ case 16:
+ Prefix = "0x";
+ break;
+ }
+ }
+
// First, check for a zero value and just short circuit the logic below.
if (*this == 0) {
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
Str.push_back('0');
return;
}
@@ -2193,6 +2214,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
}
}
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
while (N) {
*--BufPtr = Digits[N % Radix];
N /= Radix;
@@ -2212,6 +2238,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
Str.push_back('-');
}
+ while (*Prefix) {
+ Str.push_back(*Prefix);
+ ++Prefix;
+ };
+
// We insert the digits backward, then reverse them to get the right order.
unsigned StartDig = Str.size();
@@ -2251,7 +2282,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
/// to the methods above.
std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
SmallString<40> S;
- toString(S, Radix, Signed);
+ toString(S, Radix, Signed, /* formatAsCLiteral = */false);
return S.str();
}
@@ -2266,7 +2297,7 @@ void APInt::dump() const {
void APInt::print(raw_ostream &OS, bool isSigned) const {
SmallString<40> S;
- this->toString(S, 10, isSigned);
+ this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
OS << S.str();
}
View
119 lib/Target/ARM/ARMISelLowering.cpp
@@ -5523,12 +5523,109 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
return SDValue();
}
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+
+ // Only perform optimization if after legalize, and if NEON is available. We
+ // also expected both operands to be BUILD_VECTORs.
+ if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+ || N0.getOpcode() != ISD::BUILD_VECTOR
+ || N1.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+ EVT VT = N->getValueType(0);
+ if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ // Check that the vector operands are of the right form.
+ // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+ // operands, where N is the size of the formed vector.
+ // Each EXTRACT_VECTOR should have the same input vector and odd or even
+ // index such that we have a pair wise add pattern.
+
+ // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+ if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = N0->getOperand(0)->getOperand(0);
+ SDNode *V = Vec.getNode();
+ unsigned nextIndex = 0;
+
+ // For each operands to the ADD which are BUILD_VECTORs,
+ // check to see if each of their operands are an EXTRACT_VECTOR with
+ // the same vector and appropriate index.
+ for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+ if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+ && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+ SDValue ExtVec0 = N0->getOperand(i);
+ SDValue ExtVec1 = N1->getOperand(i);
+
+ // First operand is the vector, verify its the same.
+ if (V != ExtVec0->getOperand(0).getNode() ||
+ V != ExtVec1->getOperand(0).getNode())
+ return SDValue();
+
+ // Second is the constant, verify its correct.
+ ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+ ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+ // For the constant, we want to see all the even or all the odd.
+ if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+ || C1->getZExtValue() != nextIndex+1)
+ return SDValue();
+
+ // Increment index.
+ nextIndex+=2;
+ } else
+ return SDValue();
+ }
+
+ // Create VPADDL node.
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ DebugLoc DL = N->getDebugLoc();
+
+ // Build operand list.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+ TLI.getPointerTy()));
+
+ // Input is the vector.
+ Ops.push_back(Vec);
+
+ // Get widened type and narrowed type.
+ MVT widenType;
+ unsigned numElem = VT.getVectorNumElements();
+ switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+ case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+ case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+ case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+ default:
+ assert(0 && "Invalid vector element type for padd optimization.");
+ }
+
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+ widenType, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
/// operands N0 and N1. This is a helper for PerformADDCombine that is
/// called with the default operands, and if that fails, with commuted
/// operands.
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget){
+
+ // Attempt to create vpaddl for this add.
+ SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+ if (Result.getNode())
+ return Result;
+
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
@@ -5540,17 +5637,18 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
///
static SDValue PerformADDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// First try with the default operand order.
- SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
if (Result.getNode())
return Result;
// If that didn't work, try again with the operands commuted.
- return PerformADDCombineWithOperands(N, N1, N0, DCI);
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
}
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
@@ -5875,8 +5973,8 @@ static SDValue PerformORCombine(SDNode *N,
return SDValue();
}
-/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
-/// C1 & C2 == C1.
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
static SDValue PerformBFICombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue N1 = N->getOperand(1);
@@ -5884,9 +5982,12 @@ static SDValue PerformBFICombine(SDNode *N,
ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
if (!N11C)
return SDValue();
- unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned LSB = CountTrailingZeros_32(~InvMask);
+ unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+ unsigned Mask = (1 << Width)-1;
unsigned Mask2 = N11C->getZExtValue();
- if ((Mask & Mask2) == Mask2)
+ if ((Mask & (~Mask2)) == 0)
return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
N->getOperand(0), N1.getOperand(0),
N->getOperand(2));
@@ -6755,7 +6856,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
View
4 lib/Target/ARM/ARMInstrInfo.td
@@ -3029,6 +3029,10 @@ def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
(shl GPR:$Rm, (i32 8))), i16),
(REVSH GPR:$Rm)>;
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl GPR:$Rm, (i32 8)), 0xFF)),
+ (REVSH GPR:$Rm)>;
+
// Need the AddedComplexity or else MOVs + REV would be chosen.
let AddedComplexity = 5 in
def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
View
4 lib/Target/ARM/ARMInstrThumb2.td
@@ -2604,6 +2604,10 @@ def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
(shl rGPR:$Rm, (i32 8))), i16),
(t2REVSH rGPR:$Rm)>;
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+ (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
+ (t2REVSH rGPR:$Rm)>;
+
def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
def t2PKHBT : T2ThreeReg<
View
4 lib/Target/CBackend/CBackend.cpp
@@ -278,7 +278,7 @@ namespace {
return AI;
}
- // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+ // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
static bool isInlineAsm(const Instruction& I) {
if (const CallInst *CI = dyn_cast<CallInst>(&I))
return isa<InlineAsm>(CI->getCalledValue());
@@ -660,7 +660,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
if (isString) {
Out << '\"';
- // Keep track of whether the last number was a hexadecimal escape
+ // Keep track of whether the last number was a hexadecimal escape.
bool LastWasHex = false;
// Do not include the last character, which we know is null
View
13 lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2902,6 +2902,12 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
+ // Set CR6 to true if this is a vararg call.
+ if (isVarArg) {
+ SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+ RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+ }
+
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
@@ -2911,13 +2917,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
- // Set CR6 to true if this is a vararg call.
- if (isVarArg) {
- SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
- Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
- InFlag = Chain.getValue(1);
- }
-
if (isTailCall)
PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
false, TailCallArguments);
View
90 lib/Target/PowerPC/PPCJITInfo.cpp
@@ -87,7 +87,7 @@ asm(
// FIXME: could shrink frame
// Set up a proper stack frame
// FIXME Layout
- // PowerPC64 ABI linkage - 24 bytes
+ // PowerPC32 ABI linkage - 24 bytes
// parameters - 32 bytes
// 13 double registers - 104 bytes
// 8 int registers - 32 bytes
@@ -205,11 +205,27 @@ void PPC32CompilationCallback() {
#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
defined(__ppc64__)
+#ifdef __ELF__
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC64CompilationCallback\n"
+ ".section \".opd\",\"aw\"\n"
+ ".align 3\n"
+"PPC64CompilationCallback:\n"
+ ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+ ".size PPC64CompilationCallback,24\n"
+ ".previous\n"
+ ".align 4\n"
+ ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
asm(
".text\n"
".align 2\n"
".globl _PPC64CompilationCallback\n"
"_PPC64CompilationCallback:\n"
+#endif
// Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
// FIXME: need to save v[0-19] for altivec?
// Set up a proper stack frame
@@ -218,49 +234,55 @@ asm(
// parameters - 64 bytes
// 13 double registers - 104 bytes
// 8 int registers - 64 bytes
- "mflr r0\n"
- "std r0, 16(r1)\n"
- "stdu r1, -280(r1)\n"
+ "mflr 0\n"
+ "std 0, 16(1)\n"
+ "stdu 1, -280(1)\n"
// Save all int arg registers
- "std r10, 272(r1)\n" "std r9, 264(r1)\n"
- "std r8, 256(r1)\n" "std r7, 248(r1)\n"
- "std r6, 240(r1)\n" "std r5, 232(r1)\n"
- "std r4, 224(r1)\n" "std r3, 216(r1)\n"
+ "std 10, 272(1)\n" "std 9, 264(1)\n"
+ "std 8, 256(1)\n" "std 7, 248(1)\n"
+ "std 6, 240(1)\n" "std 5, 232(1)\n"
+ "std 4, 224(1)\n" "std 3, 216(1)\n"
// Save all call-clobbered FP regs.
- "stfd f13, 208(r1)\n" "stfd f12, 200(r1)\n"
- "stfd f11, 192(r1)\n" "stfd f10, 184(r1)\n"
- "stfd f9, 176(r1)\n" "stfd f8, 168(r1)\n"
- "stfd f7, 160(r1)\n" "stfd f6, 152(r1)\n"
- "stfd f5, 144(r1)\n" "stfd f4, 136(r1)\n"
- "stfd f3, 128(r1)\n" "stfd f2, 120(r1)\n"
- "stfd f1, 112(r1)\n"
+ "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
+ "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
+ "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
+ "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
+ "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
+ "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
+ "stfd 1, 112(1)\n"
// Arguments to Compilation Callback:
// r3 - our lr (address of the call instruction in stub plus 4)
// r4 - stub's lr (address of instruction that called the stub plus 4)
// r5 - is64Bit - always 1.
- "mr r3, r0\n"
- "ld r2, 280(r1)\n" // stub's frame
- "ld r4, 16(r2)\n" // stub's lr
- "li r5, 1\n" // 1 == 64 bit
+ "mr 3, 0\n" // return address (still in r0)
+ "ld 5, 280(1)\n" // stub's frame
+ "ld 4, 16(5)\n" // stub's lr
+ "li 5, 1\n" // 1 == 64 bit
+#ifdef __ELF__
+ "bl PPCCompilationCallbackC\n"
+ "nop\n"
+#else
"bl _PPCCompilationCallbackC\n"
- "mtctr r3\n"
+#endif
+ "mtctr 3\n"
// Restore all int arg registers
- "ld r10, 272(r1)\n" "ld r9, 264(r1)\n"
- "ld r8, 256(r1)\n" "ld r7, 248(r1)\n"
- "ld r6, 240(r1)\n" "ld r5, 232(r1)\n"
- "ld r4, 224(r1)\n" "ld r3, 216(r1)\n"
+ "ld 10, 272(1)\n" "ld 9, 264(1)\n"
+ "ld 8, 256(1)\n" "ld 7, 248(1)\n"
+ "ld 6, 240(1)\n" "ld 5, 232(1)\n"
+ "ld 4, 224(1)\n" "ld 3, 216(1)\n"
// Restore all FP arg registers
- "lfd f13, 208(r1)\n" "lfd f12, 200(r1)\n"
- "lfd f11, 192(r1)\n" "lfd f10, 184(r1)\n"
- "lfd f9, 176(r1)\n" "lfd f8, 168(r1)\n"
- "lfd f7, 160(r1)\n" "lfd f6, 152(r1)\n"
- "lfd f5, 144(r1)\n" "lfd f4, 136(r1)\n"
- "lfd f3, 128(r1)\n" "lfd f2, 120(r1)\n"
- "lfd f1, 112(r1)\n"
+ "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
+ "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
+ "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
+ "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
+ "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
+ "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
+ "lfd 1, 112(1)\n"
// Pop 3 frames off the stack and branch to target
- "ld r1, 280(r1)\n"
- "ld r2, 16(r1)\n"
- "mtlr r2\n"
+ "ld 1, 280(1)\n"
+ "ld 0, 16(1)\n"
+ "mtlr 0\n"
+ // XXX: any special TOC handling in the ELF case for JIT?
"bctr\n"
);
#else
View
12 lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -51,10 +51,20 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const
BitVector Reserved(getNumRegs());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (TFI->hasFP(MF))
+ if (TFI->hasFP(MF)) {
+ // R11D is the frame pointer. Reserve all aliases.
Reserved.set(SystemZ::R11D);
+ Reserved.set(SystemZ::R11W);
+ Reserved.set(SystemZ::R10P);
+ Reserved.set(SystemZ::R10Q);
+ }
+
Reserved.set(SystemZ::R14D);
Reserved.set(SystemZ::R15D);
+ Reserved.set(SystemZ::R14W);
+ Reserved.set(SystemZ::R15W);
+ Reserved.set(SystemZ::R14P);
+ Reserved.set(SystemZ::R14Q);
return Reserved;
}
View
267 lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -168,45 +168,7 @@ def GR32 : RegisterClass<"SystemZ", [i32], 32,
// Frame pointer, sometimes allocable
R11W,
// Volatile, but not allocable
- R14W, R15W]>
-{
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_REG32[] = {
- SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W,
- SystemZ::R5W, SystemZ::R0W, SystemZ::R12W, SystemZ::R11W,
- SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W,
- SystemZ::R6W, SystemZ::R14W, SystemZ::R13W
- };
- static const unsigned SystemZ_REG32_nofp[] = {
- SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W,
- SystemZ::R5W, SystemZ::R0W, SystemZ::R12W, /* No R11W */
- SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W,
- SystemZ::R6W, SystemZ::R14W, SystemZ::R13W
- };
- GR32Class::iterator
- GR32Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG32_nofp;
- else
- return SystemZ_REG32;
- }
- GR32Class::iterator
- GR32Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
- else
- return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
- }
- }];
-}
+ R14W, R15W]>;
/// Registers used to generate address. Everything except R0.
def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
@@ -215,45 +177,7 @@ def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
// Frame pointer, sometimes allocable
R11W,
// Volatile, but not allocable
- R14W, R15W]>
-{
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_ADDR32[] = {
- SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W,
- SystemZ::R5W, /* No R0W */ SystemZ::R12W, SystemZ::R11W,
- SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W,
- SystemZ::R6W, SystemZ::R14W, SystemZ::R13W
- };
- static const unsigned SystemZ_ADDR32_nofp[] = {
- SystemZ::R1W, SystemZ::R2W, SystemZ::R3W, SystemZ::R4W,
- SystemZ::R5W, /* No R0W */ SystemZ::R12W, /* No R11W */
- SystemZ::R10W, SystemZ::R9W, SystemZ::R8W, SystemZ::R7W,
- SystemZ::R6W, SystemZ::R14W, SystemZ::R13W
- };
- ADDR32Class::iterator
- ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_ADDR32_nofp;
- else
- return SystemZ_ADDR32;
- }
- ADDR32Class::iterator
- ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
- else
- return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
- }
- }];
-}
+ R14W, R15W]>;
def GR64 : RegisterClass<"SystemZ", [i64], 64,
// Volatile registers
@@ -261,45 +185,8 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
// Frame pointer, sometimes allocable
R11D,
// Volatile, but not allocable
- R14D, R15D]>
-{
+ R14D, R15D]> {
let SubRegClasses = [(GR32 subreg_32bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_REG64[] = {
- SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D,
- SystemZ::R5D, SystemZ::R0D, SystemZ::R12D, SystemZ::R11D,
- SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D,
- SystemZ::R6D, SystemZ::R14D, SystemZ::R13D
- };
- static const unsigned SystemZ_REG64_nofp[] = {
- SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D,
- SystemZ::R5D, SystemZ::R0D, SystemZ::R12D, /* No R11D */
- SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D,
- SystemZ::R6D, SystemZ::R14D, SystemZ::R13D
- };
- GR64Class::iterator
- GR64Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG64_nofp;
- else
- return SystemZ_REG64;
- }
- GR64Class::iterator
- GR64Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
- else
- return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
- }
- }];
}
def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
@@ -308,168 +195,30 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
// Frame pointer, sometimes allocable
R11D,
// Volatile, but not allocable
- R14D, R15D]>
-{
+ R14D, R15D]> {
let SubRegClasses = [(ADDR32 subreg_32bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_ADDR64[] = {
- SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D,
- SystemZ::R5D, /* No R0D */ SystemZ::R12D, SystemZ::R11D,
- SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D,
- SystemZ::R6D, SystemZ::R14D, SystemZ::R13D
- };
- static const unsigned SystemZ_ADDR64_nofp[] = {
- SystemZ::R1D, SystemZ::R2D, SystemZ::R3D, SystemZ::R4D,
- SystemZ::R5D, /* No R0D */ SystemZ::R12D, /* No R11D */
- SystemZ::R10D, SystemZ::R9D, SystemZ::R8D, SystemZ::R7D,
- SystemZ::R6D, SystemZ::R14D, SystemZ::R13D
- };
- ADDR64Class::iterator
- ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_ADDR64_nofp;
- else
- return SystemZ_ADDR64;
- }
- ADDR64Class::iterator
- ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
- else
- return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
- }
- }];
}
// Even-odd register pairs
def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
- [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
-{
+ [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> {
let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_REG64P[] = {
- SystemZ::R0P, SystemZ::R2P, SystemZ::R4P, SystemZ::R10P,
- SystemZ::R8P, SystemZ::R6P };
- static const unsigned SystemZ_REG64P_nofp[] = {
- SystemZ::R0P, SystemZ::R2P, SystemZ::R4P, /* NO R10P */
- SystemZ::R8P, SystemZ::R6P };
- GR64PClass::iterator
- GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG64P_nofp;
- else
- return SystemZ_REG64P;
- }
- GR64PClass::iterator
- GR64PClass::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
- else
- return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
- }
- }];
}
def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
- [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
-{
+ [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> {
let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
(GR64 subreg_even, subreg_odd)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_REG128[] = {
- SystemZ::R0Q, SystemZ::R2Q, SystemZ::R4Q, SystemZ::R10Q,
- SystemZ::R8Q, SystemZ::R6Q };
- static const unsigned SystemZ_REG128_nofp[] = {
- SystemZ::R0Q, SystemZ::R2Q, SystemZ::R4Q, /* NO R10Q */
- SystemZ::R8Q, SystemZ::R6Q };
- GR128Class::iterator
- GR128Class::allocation_order_begin(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG128_nofp;
- else
- return SystemZ_REG128;
- }
- GR128Class::iterator
- GR128Class::allocation_order_end(const MachineFunction &MF) const {
- const TargetMachine &TM = MF.getTarget();
- const TargetFrameLowering *TFI = TM.getFrameLowering();
- if (TFI->hasFP(MF))
- return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
- else
- return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
- }
- }];
}
def FP32 : RegisterClass<"SystemZ", [f32], 32,
[F0S, F1S, F2S, F3S, F4S, F5S, F6S, F7S,
- F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> {
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_REGFP32[] = {
- SystemZ::F0S, SystemZ::F2S, SystemZ::F4S, SystemZ::F6S,
- SystemZ::F1S, SystemZ::F3S, SystemZ::F5S, SystemZ::F7S,
- SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
- SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S };
- FP32Class::iterator
- FP32Class::allocation_order_begin(const MachineFunction &MF) const {
- return SystemZ_REGFP32;
- }
- FP32Class::iterator
- FP32Class::allocation_order_end(const MachineFunction &MF) const {
- return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned));
- }
- }];
-}
+ F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]>;
def FP64 : RegisterClass<"SystemZ", [f64], 64,
- [F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L,
+ [F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L,
F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
let SubRegClasses = [(FP32 subreg_32bit)];
- let MethodProtos = [{
- iterator allocation_order_begin(const MachineFunction &MF) const;
- iterator allocation_order_end(const MachineFunction &MF) const;
- }];
- let MethodBodies = [{
- static const unsigned SystemZ_REGFP64[] = {
- SystemZ::F0L, SystemZ::F2L, SystemZ::F4L, SystemZ::F6L,
- SystemZ::F1L, SystemZ::F3L, SystemZ::F5L, SystemZ::F7L,
- SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L,
- SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L };
- FP64Class::iterator
- FP64Class::allocation_order_begin(const MachineFunction &MF) const {
- return SystemZ_REGFP64;
- }
- FP64Class::iterator
- FP64Class::allocation_order_end(const MachineFunction &MF) const {
- return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned));
- }
- }];
}
// Status flags registers.
View
8 lib/Target/TargetRegisterInfo.cpp
@@ -23,12 +23,8 @@ using namespace llvm;
TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
regclass_iterator RCB, regclass_iterator RCE,
const char *const *subregindexnames,
- int CFSO, int CFDO,
- const unsigned* subregs, const unsigned subregsize,
- const unsigned* aliases, const unsigned aliasessize)
- : SubregHash(subregs), SubregHashSize(subregsize),
- AliasesHash(aliases), AliasesHashSize(aliasessize),
- Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+ int CFSO, int CFDO)
+ : Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
RegClassBegin(RCB), RegClassEnd(RCE) {
assert(isPhysicalRegister(NumRegs) &&
"Target has too many physical registers!");
View
20 lib/Target/X86/X86ISelLowering.cpp
@@ -2271,6 +2271,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const GlobalValue *GV = G->getGlobal();
if (!GV->hasDLLImportLinkage()) {
unsigned char OpFlags = 0;
+ bool ExtraLoad = false;
+ unsigned WrapperKind = ISD::DELETED_NODE;
// On ELF targets, in both X86-64 and X86-32 mode, direct calls to
// external symbols most go through the PLT in PIC mode. If the symbol
@@ -2288,10 +2290,28 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
OpFlags = X86II::MO_DARWIN_STUB;
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ isa<Function>(GV) &&
+ cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+ // If the function is marked as non-lazy, generate an indirect call
+ // which loads from the GOT directly. This avoids runtime overhead
+ // at the cost of eager binding (and one extra byte of encoding).
+ OpFlags = X86II::MO_GOTPCREL;
+ WrapperKind = X86ISD::WrapperRIP;
+ ExtraLoad = true;
}
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
+
+ // Add a wrapper if needed.
+ if (WrapperKind != ISD::DELETED_NODE)
+ Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+ // Add extra indirection if needed.
+ if (ExtraLoad)
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(),
+ false, false, 0);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned char OpFlags = 0;
View
25 lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -437,12 +437,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
MemDepResult InstDep = MD->getDependency(Inst);
- // Ignore non-local store liveness.
+ // Ignore any store where we can't find a local dependence.
// FIXME: cross-block DSE would be fun. :)
- if (InstDep.isNonLocal() ||
- // Ignore self dependence, which happens in the entry block of the
- // function.
- InstDep.getInst() == Inst)
+ if (InstDep.isNonLocal() || InstDep.isUnknown())
continue;
// If we're storing the same value back to a pointer that we just
@@ -478,7 +475,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (Loc.Ptr == 0)
continue;
- while (!InstDep.isNonLocal()) {
+ while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
// Get the memory clobbered by the instruction we depend on. MemDep will
// skip any instructions that 'Loc' clearly doesn't interact with. If we
// end up depending on a may- or must-aliased load, then we can't optimize
@@ -542,24 +539,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
/// HandleFree - Handle frees of entire structures whose dependency is a store
/// to a field of that structure.
bool DSE::HandleFree(CallInst *F) {
+ bool MadeChange = false;
+
MemDepResult Dep = MD->getDependency(F);
- do {
- if (Dep.isNonLocal()) return false;
-
+
+ while (!Dep.isNonLocal() && !Dep.isUnknown()) {
Instruction *Dependency = Dep.getInst();
if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
- return false;
+ return MadeChange;
Value *DepPointer =
GetUnderlyingObject(getStoredPointerOperand(Dependency));
// Check for aliasing.
if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
- return false;
+ return MadeChange;
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency, *MD);
++NumFastStores;
+ MadeChange = true;
// Inst's old Dependency is now deleted. Compute the next dependency,
// which may also be dead, as in
@@ -567,9 +566,9 @@ bool DSE::HandleFree(CallInst *F) {
// s[1] = 0; // This has just been deleted.
// free(s);
Dep = MD->getDependency(F);
- } while (!Dep.isNonLocal());
+ };
- return true;
+ return MadeChange;
}
/// handleEndBlock - Remove dead stores to stack-allocated locations in the
View
32 lib/Transforms/Scalar/GVN.cpp
@@ -227,21 +227,19 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
// Non-local case.
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
- // FIXME: call/call dependencies for readonly calls should return def, not
- // clobber! Move the checking logic to MemDep!
+ // FIXME: Move the checking logic to MemDep!
CallInst* cdep = 0;
// Check to see if we have a single dominating call instruction that is
// identical to C.
for (unsigned i = 0, e = deps.size(); i != e; ++i) {
const NonLocalDepEntry *I = &deps[i];
- // Ignore non-local dependencies.
if (I->getResult().isNonLocal())
continue;
- // We don't handle non-depedencies. If we already have a call, reject
+ // We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
- if (I->getResult().isClobber() || cdep != 0) {
+ if (!I->getResult().isDef() || cdep != 0) {
cdep = 0;
break;
}
@@ -1224,12 +1222,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
- Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
+ if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
DEBUG(
dbgs() << "GVN: non-local load ";
WriteAsOperand(dbgs(), LI);
- dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+ dbgs() << " has unknown dependencies\n";
);
return false;
}
@@ -1245,6 +1242,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
BasicBlock *DepBB = Deps[i].getBB();
MemDepResult DepInfo = Deps[i].getResult();
+ if (DepInfo.isUnknown()) {
+ UnavailableBlocks.push_back(DepBB);
+ continue;
+ }
+
if (DepInfo.isClobber()) {
// The address being loaded in this non-local block may not be the same as
// the pointer operand of the load if PHI translation occurs. Make sure
@@ -1305,6 +1307,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
continue;
}
+ assert(DepInfo.isDef() && "Expecting def here");
+
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
@@ -1691,10 +1695,22 @@ bool GVN::processLoad(LoadInst *L) {
return false;
}
+ if (Dep.isUnknown()) {
+ DEBUG(
+ // fast print dep, using operator<< on instruction is too slow.
+ dbgs() << "GVN: load ";
+ WriteAsOperand(dbgs(), L);
+ dbgs() << " has unknown dependence\n";
+ );
+ return false;
+ }
+
// If it is defined in another block, try harder.
if (Dep.isNonLocal())
return processNonLocalLoad(L);
+ assert(Dep.isDef() && "Expecting def here");
+
Instruction *DepInst = Dep.getInst();
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
Value *StoredVal = DepSI->getValueOperand();
View
20 lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -487,7 +487,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
- if (!LI->isVolatile() && LI->hasOneUse()) {
+ if (!LI->isVolatile() && LI->hasOneUse() &&
+ LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
CallInst *C = 0;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -496,17 +497,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
- MemDepResult sdep = MD->getDependency(SI);
- if (!sdep.isNonLocal()) {
- bool FoundCall = false;
- for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) {
- if (&*I == C) {
- FoundCall = true;
- break;
- }
- }
- if (!FoundCall)
+ AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+ AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+ for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+ E = C; I != E; --I) {
+ if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
C = 0;
+ break;
+ }
}
}
View
1  lib/Transforms/Utils/CMakeLists.txt
@@ -5,7 +5,6 @@ add_llvm_library(LLVMTransformUtils
BreakCriticalEdges.cpp
BuildLibCalls.cpp
CloneFunction.cpp
- CloneLoop.cpp
CloneModule.cpp
CodeExtractor.cpp
DemoteRegToStack.cpp
View
128 lib/Transforms/Utils/CloneLoop.cpp
@@ -1,128 +0,0 @@
-//===- CloneLoop.cpp - Clone loop nest ------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CloneLoop interface which makes a copy of a loop.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
-
-
-using namespace llvm;
-
-/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
-/// that the basic block is already cloned.
-static void CloneDominatorInfo(BasicBlock *BB,
- ValueToValueMapTy &VMap,
- DominatorTree *DT) {
-
- assert (DT && "DominatorTree is not available");
- ValueToValueMapTy::iterator BI = VMap.find(BB);
- assert (BI != VMap.end() && "BasicBlock clone is missing");
- BasicBlock *NewBB = cast<BasicBlock>(BI->second);
-
- // NewBB already got dominator info.
- if (DT->getNode(NewBB))
- return;
-
- assert (DT->getNode(BB) && "BasicBlock does not have dominator info");
- // Entry block is not expected here. Infinite loops are not to cloned.
- assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator");
- BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock();
-
- // NewBB's dominator is either BB's dominator or BB's dominator's clone.
- BasicBlock *NewBBDom = BBDom;
- ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
- if (BBDomI != VMap.end()) {
- NewBBDom = cast<BasicBlock>(BBDomI->second);
- if (!DT->getNode(NewBBDom))
- CloneDominatorInfo(BBDom, VMap, DT);
- }
- DT->addNewBlock(NewBB, NewBBDom);
-}
-
-/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
-/// using old blocks to new blocks mapping.
-Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
- ValueToValueMapTy &VMap, Pass *P) {
-
- DominatorTree *DT = NULL;
- if (P)
- DT = P->getAnalysisIfAvailable<DominatorTree>();
-
- SmallVector<BasicBlock *, 16> NewBlocks;
-
- // Populate loop nest.
- SmallVector<Loop *, 8> LoopNest;
- LoopNest.push_back(OrigL);
-
-
- Loop *NewParentLoop = NULL;
- do {
- Loop *L = LoopNest.pop_back_val();
- Loop *NewLoop = new Loop();
-
- if (!NewParentLoop)
- NewParentLoop = NewLoop;
-
- LPM->insertLoop(NewLoop, L->getParentLoop());
-
- // Clone Basic Blocks.
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
- VMap[BB] = NewBB;
- if (P)
- LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
- NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
- NewBlocks.push_back(NewBB);
- }
-
- // Clone dominator info.
- if (DT)
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- CloneDominatorInfo(BB, VMap, DT);
- }
-
- // Process sub loops
- for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
- LoopNest.push_back(*I);
- } while (!LoopNest.empty());
-
- // Remap instructions to reference operands from VMap.
- for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(),
- NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) {
- BasicBlock *NB = *NBItr;
- for(BasicBlock::iterator BI = NB->begin(), BE = NB->end();
- BI != BE; ++BI) {
- Instruction *Insn = BI;
- for (unsigned index = 0, num_ops = Insn->getNumOperands();
- index != num_ops; ++index) {
- Value *Op = Insn->getOperand(index);
- ValueToValueMapTy::iterator OpItr = VMap.find(Op);
- if (OpItr != VMap.end())
- Insn->setOperand(index, OpItr->second);
- }
- }
- }
-
- BasicBlock *Latch = OrigL->getLoopLatch();
- Function *F = Latch->getParent();
- F->getBasicBlockList().insert(OrigL->getHeader(),
- NewBlocks.begin(), NewBlocks.end());
-
-
- return NewParentLoop;
-}
View
28 lib/Transforms/Utils/Local.cpp
@@ -706,39 +706,15 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
///
static unsigned enforceKnownAlignment(Value *V, unsigned Align,
unsigned PrefAlign) {
+ V = V->stripPointerCasts();
- User *U = dyn_cast<User>(V);
- if (!U) return Align;
-
- switch (Operator::getOpcode(U)) {
- default: break;
- case Instruction::BitCast:
- return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
- case Instruction::GetElementPtr: {
- // If all indexes are zero, it is just the alignment of the base pointer.
- bool AllZeroOperands = true;
- for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
- if (!isa<Constant>(*i) ||
- !cast<Constant>(*i)->isNullValue()) {
- AllZeroOperands = false;
- break;
- }
-
- if (AllZeroOperands) {
- // Treat this like a bitcast.
- return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
- }
- return Align;
- }
- case Instruction::Alloca: {
- AllocaInst *AI = cast<AllocaInst>(V);
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
// If there is a requested alignment and if this is an alloca, round up.
if (AI->getAlignment() >= PrefAlign)
return AI->getAlignment();
AI->setAlignment(PrefAlign);
return PrefAlign;
}
- }
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
// If there is a large requested alignment and we can, bump up the alignment
View
20 lib/VMCore/AsmWriter.cpp
@@ -1401,7 +1401,25 @@ void AssemblyWriter::printModule(const Module *M) {
}
void AssemblyWriter::printNam