Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge branch 'master' of http://llvm.org/git/llvm into mico32

  • Loading branch information...
commit 867b0a9ddf7529ec2404d6546f0768cd410ba75c 2 parents 2a1ffcf + cff6193
jpbonn authored June 15, 2011

Showing 68 changed files with 1,071 additions and 875 deletions. Show diff stats Hide diff stats

  1. 39  docs/ProgrammersManual.html
  2. 7  include/llvm/ADT/APInt.h
  3. 2  include/llvm/ADT/PackedVector.h
  4. 11  include/llvm/Analysis/MemoryDependenceAnalysis.h
  5. 5  include/llvm/Attributes.h
  6. 6  include/llvm/CodeGen/ScheduleDAG.h
  7. 13  include/llvm/MC/MCContext.h
  8. 49  include/llvm/Target/TargetRegisterInfo.h
  9. 6  include/llvm/Transforms/Utils/Cloning.h
  10. 29  lib/Analysis/MemDepPrinter.cpp
  11. 40  lib/Analysis/MemoryDependenceAnalysis.cpp
  12. 7  lib/AsmParser/LLLexer.cpp
  13. 1  lib/AsmParser/LLParser.cpp
  14. 1  lib/AsmParser/LLToken.h
  15. 2  lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  16. 12  lib/CodeGen/ScheduleDAG.cpp
  17. 10  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  18. 3  lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
  19. 2  lib/CodeGen/SelectionDAG/LegalizeTypes.h
  20. 56  lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
  21. 2  lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
  22. 33  lib/MC/MCParser/AsmParser.cpp
  23. 37  lib/Support/APInt.cpp
  24. 119  lib/Target/ARM/ARMISelLowering.cpp
  25. 4  lib/Target/ARM/ARMInstrInfo.td
  26. 4  lib/Target/ARM/ARMInstrThumb2.td
  27. 4  lib/Target/CBackend/CBackend.cpp
  28. 13  lib/Target/PowerPC/PPCISelLowering.cpp
  29. 90  lib/Target/PowerPC/PPCJITInfo.cpp
  30. 12  lib/Target/SystemZ/SystemZRegisterInfo.cpp
  31. 267  lib/Target/SystemZ/SystemZRegisterInfo.td
  32. 8  lib/Target/TargetRegisterInfo.cpp
  33. 20  lib/Target/X86/X86ISelLowering.cpp
  34. 25  lib/Transforms/Scalar/DeadStoreElimination.cpp
  35. 32  lib/Transforms/Scalar/GVN.cpp
  36. 20  lib/Transforms/Scalar/MemCpyOptimizer.cpp
  37. 1  lib/Transforms/Utils/CMakeLists.txt
  38. 128  lib/Transforms/Utils/CloneLoop.cpp
  39. 28  lib/Transforms/Utils/Local.cpp
  40. 20  lib/VMCore/AsmWriter.cpp
  41. 2  lib/VMCore/Attributes.cpp
  42. 9  runtime/libprofile/Makefile
  43. 13  test/CodeGen/ARM/bfi.ll
  44. 33  test/CodeGen/ARM/jumptable-label.ll
  45. 13  test/CodeGen/ARM/rev.ll
  46. 14  test/CodeGen/ARM/vpadd.ll
  47. 4  test/CodeGen/SystemZ/11-BSwap.ll
  48. 19  test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
  49. 17  test/CodeGen/X86/4char-promote.ll
  50. 27  test/CodeGen/X86/non-lazy-bind.ll
  51. 53  test/CodeGen/X86/shl_undef.ll
  52. 7  test/CodeGen/X86/tail-threshold.ll
  53. 2  test/Feature/paramattrs.ll
  54. 30  test/MC/X86/x86-64.s
  55. 115  unittests/ADT/PackedVectorTest.cpp
  56. 1  unittests/CMakeLists.txt
  57. 8  utils/TableGen/AsmMatcherEmitter.cpp
  58. 30  utils/TableGen/AsmWriterEmitter.cpp
  59. 3  utils/TableGen/CodeGenInstruction.cpp
  60. 103  utils/TableGen/CodeGenRegisters.cpp
  61. 69  utils/TableGen/CodeGenRegisters.h
  62. 21  utils/TableGen/CodeGenTarget.cpp
  63. 66  utils/TableGen/CodeGenTarget.h
  64. 4  utils/TableGen/DAGISelMatcherGen.cpp
  65. 2  utils/TableGen/FastISelEmitter.cpp
  66. 8  utils/TableGen/InstrInfoEmitter.cpp
  67. 103  utils/TableGen/RegisterInfoEmitter.cpp
  68. 2  utils/llvm.grm
39  docs/ProgrammersManual.html
@@ -64,6 +64,7 @@
64 64
       <li><a href="#dss_deque">&lt;deque&gt;</a></li>
65 65
       <li><a href="#dss_list">&lt;list&gt;</a></li>
66 66
       <li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
  67
+      <li><a href="#dss_packedvector">llvm/ADT/PackedVector.h</a></li>
67 68
       <li><a href="#dss_other">Other Sequential Container Options</a></li>
68 69
     </ul></li>
69 70
     <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
@@ -1069,6 +1070,44 @@
1069 1070
 
1070 1071
 <!-- _______________________________________________________________________ -->
1071 1072
 <h4>
  1073
+  <a name="dss_packedvector">llvm/ADT/PackedVector.h</a>
  1074
+</h4>
  1075
+
  1076
+<div>
  1077
+<p>
  1078
+Useful for storing a vector of values using only a few number of bits for each
  1079
+value. Apart from the standard operations of a vector-like container, it can
  1080
+also perform an 'or' set operation. 
  1081
+</p>
  1082
+
  1083
+<p>For example:</p>
  1084
+
  1085
+<div class="doc_code">
  1086
+<pre>
  1087
+enum State {
  1088
+    None = 0x0,
  1089
+    FirstCondition = 0x1,
  1090
+    SecondCondition = 0x2,
  1091
+    Both = 0x3
  1092
+};
  1093
+
  1094
+State get() {
  1095
+    PackedVector&lt;State, 2&gt; Vec1;
  1096
+    Vec1.push_back(FirstCondition);
  1097
+
  1098
+    PackedVector&lt;State, 2&gt; Vec2;
  1099
+    Vec2.push_back(SecondCondition);
  1100
+
  1101
+    Vec1 |= Vec2;
  1102
+    return Vec1[0]; // returns 'Both'.
  1103
+}
  1104
+</pre>
  1105
+</div>
  1106
+
  1107
+</div>
  1108
+
  1109
+<!-- _______________________________________________________________________ -->
  1110
+<h4>
1072 1111
   <a name="dss_ilist_traits">ilist_traits</a>
1073 1112
 </h4>
1074 1113
 
7  include/llvm/ADT/APInt.h
@@ -1241,18 +1241,19 @@ class APInt {
1241 1241
 
1242 1242
   /// toString - Converts an APInt to a string and append it to Str.  Str is
1243 1243
   /// commonly a SmallString.
1244  
-  void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed) const;
  1244
+  void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
  1245
+                bool formatAsCLiteral = false) const;
1245 1246
 
1246 1247
   /// Considers the APInt to be unsigned and converts it into a string in the
1247 1248
   /// radix given. The radix can be 2, 8, 10 or 16.
1248 1249
   void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1249  
-    toString(Str, Radix, false);
  1250
+    toString(Str, Radix, false, false);
1250 1251
   }
1251 1252
 
1252 1253
   /// Considers the APInt to be signed and converts it into a string in the
1253 1254
   /// radix given. The radix can be 2, 8, 10 or 16.
1254 1255
   void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1255  
-    toString(Str, Radix, true);
  1256
+    toString(Str, Radix, true, false);
1256 1257
   }
1257 1258
 
1258 1259
   /// toString - This returns the APInt as a std::string.  Note that this is an
2  include/llvm/ADT/PackedVector.h
@@ -90,7 +90,7 @@ class PackedVector : public PackedVectorBase<T, BitNum,
90 90
       Vec.setValue(Vec.Bits, Idx, val);
91 91
       return *this;
92 92
     }
93  
-    operator T() {
  93
+    operator T() const {
94 94
       return Vec.getValue(Vec.Bits, Idx);
95 95
     }
96 96
   };
11  include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -90,18 +90,27 @@ namespace llvm {
90 90
     /// get methods: These are static ctor methods for creating various
91 91
     /// MemDepResult kinds.
92 92
     static MemDepResult getDef(Instruction *Inst) {
  93
+      assert(Inst && "Def requires inst");
93 94
       return MemDepResult(PairTy(Inst, Def));
94 95
     }
95 96
     static MemDepResult getClobber(Instruction *Inst) {
  97
+      assert(Inst && "Clobber requires inst");
96 98
       return MemDepResult(PairTy(Inst, Clobber));
97 99
     }
98 100
     static MemDepResult getNonLocal() {
99 101
       return MemDepResult(PairTy(0, NonLocal));
100 102
     }
  103
+    static MemDepResult getUnknown() {
  104
+      return MemDepResult(PairTy(0, Clobber));
  105
+    }
101 106
 
102 107
     /// isClobber - Return true if this MemDepResult represents a query that is
103 108
     /// a instruction clobber dependency.
104  
-    bool isClobber() const { return Value.getInt() == Clobber; }
  109
+    bool isClobber() const { return Value.getInt() == Clobber && getInst(); }
  110
+
  111
+    /// isUnknown - Return true if this MemDepResult represents a query which
  112
+    /// cannot and/or will not be computed.
  113
+    bool isUnknown() const { return Value.getInt() == Clobber && !getInst(); }
105 114
 
106 115
     /// isDef - Return true if this MemDepResult represents a query that is
107 116
     /// a instruction definition dependency.
5  include/llvm/Attributes.h
@@ -69,6 +69,9 @@ const Attributes Hotpatch    = 1<<29;     ///< Function should have special
69 69
                                           ///'hotpatch' sequence in prologue
70 70
 const Attributes UWTable     = 1<<30;     ///< Function must be in a unwind
71 71
                                           ///table
  72
+const Attributes NonLazyBind = 1U<<31;    ///< Function is called early and/or
  73
+                                          ///  often, so lazy binding isn't
  74
+                                          ///  worthwhile.
72 75
 
73 76
 /// Note that uwtable is about the ABI or the user mandating an entry in the
74 77
 /// unwind table. The nounwind attribute is about an exception passing by the
@@ -90,7 +93,7 @@ const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
90 93
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
91 94
   NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
92 95
   NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment |
93  
-  Hotpatch | UWTable;
  96
+  Hotpatch | UWTable | NonLazyBind;
94 97
 
95 98
 /// @brief Parameter attributes that do not apply to vararg call arguments.
96 99
 const Attributes VarArgsIncompatible = StructRet;
6  include/llvm/CodeGen/ScheduleDAG.h
@@ -497,6 +497,12 @@ namespace llvm {
497 497
     SUnit EntrySU;                        // Special node for the region entry.
498 498
     SUnit ExitSU;                         // Special node for the region exit.
499 499
 
  500
+#ifdef NDEBUG
  501
+    static const bool StressSched = false;
  502
+#else
  503
+    bool StressSched;
  504
+#endif
  505
+
500 506
     explicit ScheduleDAG(MachineFunction &mf);
501 507
 
502 508
     virtual ~ScheduleDAG();
13  include/llvm/MC/MCContext.h
@@ -39,6 +39,9 @@ namespace llvm {
39 39
   class MCContext {
40 40
     MCContext(const MCContext&); // DO NOT IMPLEMENT
41 41
     MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT
  42
+  public:
  43
+    typedef StringMap<MCSymbol*, BumpPtrAllocator&> SymbolTable;
  44
+  private:
42 45
 
43 46
     /// The MCAsmInfo for this target.
44 47
     const MCAsmInfo &MAI;
@@ -52,7 +55,7 @@ namespace llvm {
52 55
     BumpPtrAllocator Allocator;
53 56
 
54 57
     /// Symbols - Bindings of names to symbols.
55  
-    StringMap<MCSymbol*, BumpPtrAllocator&> Symbols;
  58
+    SymbolTable Symbols;
56 59
 
57 60
     /// UsedNames - Keeps tracks of names that were used both for used declared
58 61
     /// and artificial symbols.
@@ -142,6 +145,14 @@ namespace llvm {
142 145
     /// LookupSymbol - Get the symbol for \p Name, or null.
143 146
     MCSymbol *LookupSymbol(StringRef Name) const;
144 147
 
  148
+    /// getSymbols - Get a reference for the symbol table for clients that
  149
+    /// want to, for example, iterate over all symbols. 'const' because we
  150
+    /// still want any modifications to the table itself to use the MCContext
  151
+    /// APIs.
  152
+    const SymbolTable &getSymbols() const {
  153
+      return Symbols;
  154
+    }
  155
+
145 156
     /// @}
146 157
 
147 158
     /// @name Section Management
49  include/llvm/Target/TargetRegisterInfo.h
@@ -285,11 +285,6 @@ class TargetRegisterClass {
285 285
 /// descriptor.
286 286
 ///
287 287
 class TargetRegisterInfo {
288  
-protected:
289  
-  const unsigned* SubregHash;
290  
-  const unsigned SubregHashSize;
291  
-  const unsigned* AliasesHash;
292  
-  const unsigned AliasesHashSize;
293 288
 public:
294 289
   typedef const TargetRegisterClass * const * regclass_iterator;
295 290
 private:
@@ -307,11 +302,7 @@ class TargetRegisterInfo {
307 302
                      regclass_iterator RegClassEnd,
308 303
                      const char *const *subregindexnames,
309 304
                      int CallFrameSetupOpcode = -1,
310  
-                     int CallFrameDestroyOpcode = -1,
311  
-                     const unsigned* subregs = 0,
312  
-                     const unsigned subregsize = 0,
313  
-                     const unsigned* aliases = 0,
314  
-                     const unsigned aliasessize = 0);
  305
+                     int CallFrameDestroyOpcode = -1);
315 306
   virtual ~TargetRegisterInfo();
316 307
 public:
317 308
 
@@ -468,50 +459,28 @@ class TargetRegisterInfo {
468 459
   /// regsOverlap - Returns true if the two registers are equal or alias each
469 460
   /// other. The registers may be virtual register.
470 461
   bool regsOverlap(unsigned regA, unsigned regB) const {
471  
-    if (regA == regB)
472  
-      return true;
473  
-
474  
-    if (regA > regB)
475  
-      std::swap(regA, regB);
476  
-
  462
+    if (regA == regB) return true;
477 463
     if (isVirtualRegister(regA) || isVirtualRegister(regB))
478 464
       return false;
479  
-
480  
-    // regA and regB are distinct physical registers. Do they alias?
481  
-    size_t index = (regA * 11 + regB * 97) & (AliasesHashSize-1);
482  
-    unsigned ProbeAmt = 1;
483  
-    while (AliasesHash[index*2] != 0 && AliasesHash[index*2+1] != 0) {
484  
-      if (AliasesHash[index*2] == regA && AliasesHash[index*2+1] == regB)
485  
-        return true;
486  
-
487  
-      index = (index + ProbeAmt) & (AliasesHashSize-1);
488  
-      ProbeAmt += 1;
  465
+    for (const unsigned *regList = getOverlaps(regA)+1; *regList; ++regList) {
  466
+      if (*regList == regB) return true;
489 467
     }
490  
-
491 468
     return false;
492 469
   }
493 470
 
494 471
   /// isSubRegister - Returns true if regB is a sub-register of regA.
495 472
   ///
496 473
   bool isSubRegister(unsigned regA, unsigned regB) const {
497  
-    // SubregHash is a simple quadratically probed hash table.
498  
-    size_t index = (regA * 11 + regB * 97) & (SubregHashSize-1);
499  
-    unsigned ProbeAmt = 1;
500  
-    while (SubregHash[index*2] != 0 && SubregHash[index*2+1] != 0) {
501  
-      if (SubregHash[index*2] == regA && SubregHash[index*2+1] == regB)
502  
-        return true;
503  
-
504  
-      index = (index + ProbeAmt) & (SubregHashSize-1);
505  
-      ProbeAmt += 1;
506  
-    }
507  
-
508  
-    return false;
  474
+    return isSuperRegister(regB, regA);
509 475
   }
510 476
 
511 477
   /// isSuperRegister - Returns true if regB is a super-register of regA.
512 478
   ///
513 479
   bool isSuperRegister(unsigned regA, unsigned regB) const {
514  
-    return isSubRegister(regB, regA);
  480
+    for (const unsigned *regList = getSuperRegisters(regA); *regList;++regList){
  481
+      if (*regList == regB) return true;
  482
+    }
  483
+    return false;
515 484
   }
516 485
 
517 486
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
6  include/llvm/Transforms/Utils/Cloning.h
@@ -107,12 +107,6 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB,
107 107
                             const Twine &NameSuffix = "", Function *F = 0,
108 108
                             ClonedCodeInfo *CodeInfo = 0);
109 109
 
110  
-
111  
-/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate
112  
-/// VMap using old blocks to new blocks mapping.
113  
-Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, 
114  
-                ValueToValueMapTy &VMap, Pass *P);
115  
-
116 110
 /// CloneFunction - Return a copy of the specified function, but without
117 111
 /// embedding the function into another module.  Also, any references specified
118 112
 /// in the VMap are changed to refer to their mapped value instead of the
29  lib/Analysis/MemDepPrinter.cpp
@@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
79 79
 
80 80
     MemDepResult Res = MDA.getDependency(Inst);
81 81
     if (!Res.isNonLocal()) {
82  
-      assert(Res.isClobber() != Res.isDef() &&
83  
-             "Local dep should be def or clobber!");
  82
+      assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
  83
+              "Local dep should be unknown, def or clobber!");
84 84
       Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
85 85
                                                           Res.isClobber()),
86 86
                                        static_cast<BasicBlock *>(0)));
@@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
92 92
       for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
93 93
            I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
94 94
         const MemDepResult &Res = I->getResult();
95  
-        assert(Res.isClobber() != Res.isDef() &&
96  
-               "Resolved non-local call dep should be def or clobber!");
  95
+        assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
  96
+                "Resolved non-local call dep should be unknown, def or "
  97
+                "clobber!");
97 98
         InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
98 99
                                                           Res.isClobber()),
99 100
                                        I->getBB()));
@@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
148 149
       bool isClobber = I->first.getInt();
149 150
       const BasicBlock *DepBB = I->second;
150 151
 
151  
-      OS << "    " << (isClobber ? "Clobber" : "    Def");
  152
+      OS << "    ";
  153
+      if (!DepInst)
  154
+        OS << "Unknown";
  155
+      else if (isClobber)
  156
+        OS << "Clobber";
  157
+      else
  158
+        OS << "    Def";
152 159
       if (DepBB) {
153 160
         OS << " in block ";
154 161
         WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
155 162
       }
156  
-      OS << " from: ";
157  
-      if (DepInst == Inst)
158  
-        OS << "<unspecified>";
159  
-      else
160  
-        DepInst->print(OS);
  163
+      if (DepInst) {
  164
+        OS << " from: ";
  165
+        if (DepInst == Inst)
  166
+          OS << "<unspecified>";
  167
+        else
  168
+          DepInst->print(OS);
  169
+      }
161 170
       OS << "\n";
162 171
     }
163 172
 
40  lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -215,11 +215,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
215 215
     }
216 216
   }
217 217
   
218  
-  // No dependence found.  If this is the entry block of the function, it is a
219  
-  // clobber, otherwise it is non-local.
  218
+  // No dependence found.  If this is the entry block of the function, it is
  219
+  // unknown, otherwise it is non-local.
220 220
   if (BB != &BB->getParent()->getEntryBlock())
221 221
     return MemDepResult::getNonLocal();
222  
-  return MemDepResult::getClobber(ScanIt);
  222
+  return MemDepResult::getUnknown();
223 223
 }
224 224
 
225 225
 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
@@ -458,11 +458,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
458 458
     }
459 459
   }
460 460
   
461  
-  // No dependence found.  If this is the entry block of the function, it is a
462  
-  // clobber, otherwise it is non-local.
  461
+  // No dependence found.  If this is the entry block of the function, it is
  462
+  // unknown, otherwise it is non-local.
463 463
   if (BB != &BB->getParent()->getEntryBlock())
464 464
     return MemDepResult::getNonLocal();
465  
-  return MemDepResult::getClobber(ScanIt);
  465
+  return MemDepResult::getUnknown();
466 466
 }
467 467
 
468 468
 /// getDependency - Return the instruction on which a memory operation
@@ -490,12 +490,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
490 490
   
491 491
   // Do the scan.
492 492
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
493  
-    // No dependence found.  If this is the entry block of the function, it is a
494  
-    // clobber, otherwise it is non-local.
  493
+    // No dependence found.  If this is the entry block of the function, it is
  494
+    // unknown, otherwise it is non-local.
495 495
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
496 496
       LocalCache = MemDepResult::getNonLocal();
497 497
     else
498  
-      LocalCache = MemDepResult::getClobber(QueryInst);
  498
+      LocalCache = MemDepResult::getUnknown();
499 499
   } else {
500 500
     AliasAnalysis::Location MemLoc;
501 501
     AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -514,7 +514,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
514 514
                                              QueryParent);
515 515
     } else
516 516
       // Non-memory instruction.
517  
-      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
  517
+      LocalCache = MemDepResult::getUnknown();
518 518
   }
519 519
   
520 520
   // Remember the result!
@@ -648,10 +648,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
648 648
       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
649 649
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
650 650
       // No dependence found.  If this is the entry block of the function, it is
651  
-      // a clobber, otherwise it is non-local.
  651
+      // a clobber, otherwise it is unknown.
652 652
       Dep = MemDepResult::getNonLocal();
653 653
     } else {
654  
-      Dep = MemDepResult::getClobber(ScanPos);
  654
+      Dep = MemDepResult::getUnknown();
655 655
     }
656 656
     
657 657
     // If we had a dirty entry for the block, update it.  Otherwise, just add
@@ -707,7 +707,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
707 707
     return;
708 708
   Result.clear();
709 709
   Result.push_back(NonLocalDepResult(FromBB,
710  
-                                     MemDepResult::getClobber(FromBB->begin()),
  710
+                                     MemDepResult::getUnknown(),
711 711
                                      const_cast<Value *>(Loc.Ptr)));
712 712
 }
713 713
 
@@ -769,7 +769,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
769 769
   // If the block has a dependency (i.e. it isn't completely transparent to
770 770
   // the value), remember the reverse association because we just added it
771 771
   // to Cache!
772  
-  if (Dep.isNonLocal())
  772
+  if (Dep.isNonLocal() || Dep.isUnknown())
773 773
     return Dep;
774 774
   
775 775
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
@@ -1091,16 +1091,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
1091 1091
 
1092 1092
       // If getNonLocalPointerDepFromBB fails here, that means the cached
1093 1093
       // result conflicted with the Visited list; we have to conservatively
1094  
-      // assume a clobber, but this also does not block PRE of the load.
  1094
+      // assume it is unknown, but this also does not block PRE of the load.
1095 1095
       if (!CanTranslate ||
1096 1096
           getNonLocalPointerDepFromBB(PredPointer,
1097 1097
                                       Loc.getWithNewPtr(PredPtrVal),
1098 1098
                                       isLoad, Pred,
1099 1099
                                       Result, Visited)) {
1100 1100
         // Add the entry to the Result list.
1101  
-        NonLocalDepResult Entry(Pred,
1102  
-                                MemDepResult::getClobber(Pred->getTerminator()),
1103  
-                                PredPtrVal);
  1101
+        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1104 1102
         Result.push_back(Entry);
1105 1103
 
1106 1104
         // Since we had a phi translation failure, the cache for CacheKey won't
@@ -1145,8 +1143,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
1145 1143
     // results from the set".  Clear out the indicator for this.
1146 1144
     CacheInfo->Pair = BBSkipFirstBlockPair();
1147 1145
     
1148  
-    // If *nothing* works, mark the pointer as being clobbered by the first
1149  
-    // instruction in this block.
  1146
+    // If *nothing* works, mark the pointer as unknown.
1150 1147
     //
1151 1148
     // If this is the magic first block, return this as a clobber of the whole
1152 1149
     // incoming value.  Since we can't phi translate to one of the predecessors,
@@ -1161,8 +1158,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
1161 1158
       
1162 1159
       assert(I->getResult().isNonLocal() &&
1163 1160
              "Should only be here with transparent block");
1164  
-      I->setResult(MemDepResult::getClobber(BB->getTerminator()));
1165  
-      ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey);
  1161
+      I->setResult(MemDepResult::getUnknown());
1166 1162
       Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
1167 1163
                                          Pointer.getAddr()));
1168 1164
       break;
7  lib/AsmParser/LLLexer.cpp
@@ -422,13 +422,15 @@ static bool JustWhitespaceNewLine(const char *&Ptr) {
422 422
 ///    !
423 423
 lltok::Kind LLLexer::LexExclaim() {
424 424
   // Lex a metadata name as a MetadataVar.
425  
-  if (isalpha(CurPtr[0])) {
  425
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
  426
+      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
426 427
     ++CurPtr;
427 428
     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
428  
-           CurPtr[0] == '.' || CurPtr[0] == '_')
  429
+           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
429 430
       ++CurPtr;
430 431
 
431 432
     StrVal.assign(TokStart+1, CurPtr);   // Skip !
  433
+    UnEscapeLexed(StrVal);
432 434
     return lltok::MetadataVar;
433 435
   }
434 436
   return lltok::exclaim;
@@ -570,6 +572,7 @@ lltok::Kind LLLexer::LexIdentifier() {
570 572
   KEYWORD(noimplicitfloat);
571 573
   KEYWORD(naked);
572 574
   KEYWORD(hotpatch);
  575
+  KEYWORD(nonlazybind);
573 576
 
574 577
   KEYWORD(type);
575 578
   KEYWORD(opaque);
1  lib/AsmParser/LLParser.cpp
@@ -985,6 +985,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
985 985
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
986 986
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
987 987
     case lltok::kw_hotpatch:        Attrs |= Attribute::Hotpatch; break;
  988
+    case lltok::kw_nonlazybind:     Attrs |= Attribute::NonLazyBind; break;
988 989
 
989 990
     case lltok::kw_alignstack: {
990 991
       unsigned Alignment;
1  lib/AsmParser/LLToken.h
@@ -99,6 +99,7 @@ namespace lltok {
99 99
     kw_noimplicitfloat,
100 100
     kw_naked,
101 101
     kw_hotpatch,
  102
+    kw_nonlazybind,
102 103
 
103 104
     kw_type,
104 105
     kw_opaque,
2  lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1948,6 +1948,8 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
1948 1948
     for (MachineInstr::mop_iterator OI = MI.operands_begin(),
1949 1949
            OE = MI.operands_end(); OI != OE; ++OI) {
1950 1950
       const MachineOperand& OP = *OI;
  1951
+      if (OP.isJTI())
  1952
+        return false;
1951 1953
       if (OP.isMBB() && OP.getMBB() == MBB)
1952 1954
         return false;
1953 1955
     }
12  lib/CodeGen/ScheduleDAG.cpp
@@ -19,17 +19,27 @@
19 19
 #include "llvm/Target/TargetMachine.h"
20 20
 #include "llvm/Target/TargetInstrInfo.h"
21 21
 #include "llvm/Target/TargetRegisterInfo.h"
  22
+#include "llvm/Support/CommandLine.h"
22 23
 #include "llvm/Support/Debug.h"
23 24
 #include "llvm/Support/raw_ostream.h"
24 25
 #include <climits>
25 26
 using namespace llvm;
26 27
 
  28
+#ifndef NDEBUG
  29
+cl::opt<bool> StressSchedOpt(
  30
+  "stress-sched", cl::Hidden, cl::init(false),
  31
+  cl::desc("Stress test instruction scheduling"));
  32
+#endif
  33
+
27 34
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
28 35
   : TM(mf.getTarget()),
29 36
     TII(TM.getInstrInfo()),
30 37
     TRI(TM.getRegisterInfo()),
31 38
     MF(mf), MRI(mf.getRegInfo()),
32 39
     EntrySU(), ExitSU() {
  40
+#ifndef NDEBUG
  41
+  StressSched = StressSchedOpt;
  42
+#endif
33 43
 }
34 44
 
35 45
 ScheduleDAG::~ScheduleDAG() {}
@@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
307 317
       if (I->isArtificial())
308 318
         dbgs() << " *";
309 319
       dbgs() << ": Latency=" << I->getLatency();
  320
+      if (I->isAssignedRegDep())
  321
+        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
310 322
       dbgs() << "\n";
311 323
     }
312 324
   }
10  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3030,6 +3030,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
3030 3030
   // fold (shl x, 0) -> x
3031 3031
   if (N1C && N1C->isNullValue())
3032 3032
     return N0;
  3033
+  // fold (shl undef, x) -> 0
  3034
+  if (N0.getOpcode() == ISD::UNDEF)
  3035
+    return DAG.getConstant(0, VT);
3033 3036
   // if (shl x, c) is known to be zero, return 0
3034 3037
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
3035 3038
                             APInt::getAllOnesValue(OpSizeInBits)))
@@ -6425,14 +6428,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
6425 6428
 
6426 6429
   // FIXME: is there such a thing as a truncating indexed store?
6427 6430
   if (ST->isTruncatingStore() && ST->isUnindexed() &&
6428  
-      Value.getValueType().isInteger() && !Value.getValueType().isVector()) {
  6431
+      Value.getValueType().isInteger()) {
6429 6432
     // See if we can simplify the input to this truncstore with knowledge that
6430 6433
     // only the low bits are being used.  For example:
6431 6434
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
6432 6435
     SDValue Shorter =
6433 6436
       GetDemandedBits(Value,
6434  
-                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
6435  
-                                           ST->getMemoryVT().getSizeInBits()));
  6437
+                      APInt::getLowBitsSet(
  6438
+                        Value.getValueType().getScalarType().getSizeInBits(),
  6439
+                        ST->getMemoryVT().getScalarType().getSizeInBits()));
6436 6440
     AddToWorkList(Value.getNode());
6437 6441
     if (Shorter.getNode())
6438 6442
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
3  lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -972,7 +972,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
972 972
   DebugLoc dl = N->getDebugLoc();
973 973
   SDValue Op = GetPromotedInteger(N->getOperand(0));
974 974
   Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
975  
-  return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
  975
+  return DAG.getZeroExtendInReg(Op, dl,
  976
+                                N->getOperand(0).getValueType().getScalarType());
976 977
 }
977 978
 
978 979
 
2  lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -201,7 +201,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
201 201
     EVT OldVT = Op.getValueType();
202 202
     DebugLoc dl = Op.getDebugLoc();
203 203
     Op = GetPromotedInteger(Op);
204  
-    return DAG.getZeroExtendInReg(Op, dl, OldVT);
  204
+    return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
205 205
   }
206 206
 
207 207
   // Integer Result Promotion.
56  lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1369,6 +1369,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1369 1369
   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1370 1370
 };
1371 1371
 
  1372
+#ifndef NDEBUG
  1373
+template<class SF>
  1374
+struct reverse_sort : public queue_sort {
  1375
+  SF &SortFunc;
  1376
+  reverse_sort(SF &sf) : SortFunc(sf) {}
  1377
+  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
  1378
+
  1379
+  bool operator()(SUnit* left, SUnit* right) const {
  1380
+    // reverse left/right rather than simply !SortFunc(left, right)
  1381
+    // to expose different paths in the comparison logic.
  1382
+    return SortFunc(right, left);
  1383
+  }
  1384
+};
  1385
+#endif // NDEBUG
  1386
+
1372 1387
 /// bu_ls_rr_sort - Priority function for bottom up register pressure
1373 1388
 // reduction scheduler.
1374 1389
 struct bu_ls_rr_sort : public queue_sort {
@@ -1569,20 +1584,33 @@ class RegReductionPQBase : public SchedulingPriorityQueue {
1569 1584
 };
1570 1585
 
1571 1586
 template<class SF>
1572  
-class RegReductionPriorityQueue : public RegReductionPQBase {
1573  
-  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
1574  
-    std::vector<SUnit *>::iterator Best = Q.begin();
1575  
-    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
1576  
-           E = Q.end(); I != E; ++I)
1577  
-      if (Picker(*Best, *I))
1578  
-        Best = I;
1579  
-    SUnit *V = *Best;
1580  
-    if (Best != prior(Q.end()))
1581  
-      std::swap(*Best, Q.back());
1582  
-    Q.pop_back();
1583  
-    return V;
  1587
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
  1588
+  std::vector<SUnit *>::iterator Best = Q.begin();
  1589
+  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
  1590
+         E = Q.end(); I != E; ++I)
  1591
+    if (Picker(*Best, *I))
  1592
+      Best = I;
  1593
+  SUnit *V = *Best;
  1594
+  if (Best != prior(Q.end()))
  1595
+    std::swap(*Best, Q.back());
  1596
+  Q.pop_back();
  1597
+  return V;
  1598
+}
  1599
+
  1600
+template<class SF>
  1601
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
  1602
+#ifndef NDEBUG
  1603
+  if (DAG->StressSched) {
  1604
+    reverse_sort<SF> RPicker(Picker);
  1605
+    return popFromQueueImpl(Q, RPicker);
1584 1606
   }
  1607
+#endif
  1608
+  (void)DAG;
  1609
+  return popFromQueueImpl(Q, Picker);
  1610
+}
1585 1611
 
  1612
+template<class SF>
  1613
+class RegReductionPriorityQueue : public RegReductionPQBase {
1586 1614
   SF Picker;
1587 1615
 
1588 1616
 public:
@@ -1603,7 +1631,7 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
1603 1631
   SUnit *pop() {
1604 1632
     if (Queue.empty()) return NULL;
1605 1633
 
1606  
-    SUnit *V = popFromQueue(Queue, Picker);
  1634
+    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1607 1635
     V->NodeQueueId = 0;
1608 1636
     return V;
1609 1637
   }
@@ -1613,7 +1641,7 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
1613 1641
     std::vector<SUnit*> DumpQueue = Queue;
1614 1642
     SF DumpPicker = Picker;
1615 1643
     while (!DumpQueue.empty()) {
1616  
-      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
  1644
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1617 1645
       if (isBottomUp())
1618 1646
         dbgs() << "Height " << SU->getHeight() << ": ";
1619 1647
       else
2  lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -435,7 +435,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
435 435
         // it requires a cross class copy (cost < 0). That means we are only
436 436
         // treating "expensive to copy" register dependency as physical register
437 437
         // dependency. This may change in the future though.
438  
-        if (Cost >= 0)
  438
+        if (Cost >= 0 && !StressSched)
439 439
           PhysReg = 0;
440 440
 
441 441
         // If this is a ctrl dep, latency is 1.
33  lib/MC/MCParser/AsmParser.cpp
@@ -84,6 +84,7 @@ class AsmParser : public MCAsmParser {
84 84
   AsmLexer Lexer;
85 85
   MCContext &Ctx;
86 86
   MCStreamer &Out;
  87
+  const MCAsmInfo &MAI;
87 88
   SourceMgr &SrcMgr;
88 89
   MCAsmParserExtension *GenericParser;
89 90
   MCAsmParserExtension *PlatformParser;
@@ -135,7 +136,7 @@ class AsmParser : public MCAsmParser {
135 136
   virtual MCContext &getContext() { return Ctx; }
136 137
   virtual MCStreamer &getStreamer() { return Out; }
137 138
 
138  
-  virtual bool Warning(SMLoc L, const Twine &Meg);
  139
+  virtual bool Warning(SMLoc L, const Twine &Msg);
139 140
   virtual bool Error(SMLoc L, const Twine &Msg);
140 141
 
141 142
   const AsmToken &Lex();
@@ -160,8 +161,9 @@ class AsmParser : public MCAsmParser {
160 161
   void HandleMacroExit();
161 162
 
162 163
   void PrintMacroInstantiations();
163  
-  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
164  
-    SrcMgr.PrintMessage(Loc, Msg, Type);
  164
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
  165
+                    bool ShowLine = true) const {
  166
+    SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
165 167
   }
166 168
 
167 169
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
@@ -337,7 +339,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
337 339
 
338 340
 AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
339 341
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
340  
-  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
  342
+  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
341 343
     GenericParser(new GenericAsmParser), PlatformParser(0),
342 344
     CurBuffer(0), MacrosEnabled(true) {
343 345
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
@@ -466,6 +468,29 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
466 468
       TokError("unassigned file number: " + Twine(i) + " for .file directives");
467 469
   }
468 470
 
  471
+  // Check to see that all assembler local symbols were actually defined.
  472
+  // Targets that don't do subsections via symbols may not want this, though,
  473
+  // so conservatively exclude them. Only do this if we're finalizing, though,
  474
+  // as otherwise we won't necessarilly have seen everything yet.
  475
+  if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
  476
+    const MCContext::SymbolTable &Symbols = getContext().getSymbols();
  477
+    for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
  478
+         e = Symbols.end();
  479
+         i != e; ++i) {
  480
+      MCSymbol *Sym = i->getValue();
  481
+      // Variable symbols may not be marked as defined, so check those
  482
+      // explicitly. If we know it's a variable, we have a definition for
  483
+      // the purposes of this check.
  484
+      if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
  485
+        // FIXME: We would really like to refer back to where the symbol was
  486
+        // first referenced for a source location. We need to add something
  487
+        // to track that. Currently, we just point to the end of the file.
  488
+        PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
  489
+                     Sym->getName() + "' not defined", "error", false);
  490
+    }
  491
+  }
  492
+
  493
+
469 494
   // Finalize the output stream if there are no errors and if the client wants
470 495
   // us to.
471 496
   if (!HadError && !NoFinalize)
37  lib/Support/APInt.cpp
@@ -2164,12 +2164,33 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2164 2164
 }
2165 2165
 
2166 2166
 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2167  
-                     bool Signed) const {
  2167
+                     bool Signed, bool formatAsCLiteral) const {
2168 2168
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
2169 2169
          "Radix should be 2, 8, 10, or 16!");
2170 2170
 
  2171
+  const char *Prefix = "";
  2172
+  if (formatAsCLiteral) {
  2173
+    switch (Radix) {
  2174
+      case 2:
  2175
+        // Binary literals are a non-standard extension added in gcc 4.3:
  2176
+        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
  2177
+        Prefix = "0b";
  2178
+        break;
  2179
+      case 8:
  2180
+        Prefix = "0";
  2181
+        break;
  2182
+      case 16:
  2183
+        Prefix = "0x";
  2184
+        break;
  2185
+    }
  2186
+  }
  2187
+
2171 2188
   // First, check for a zero value and just short circuit the logic below.
2172 2189
   if (*this == 0) {
  2190
+    while (*Prefix) {
  2191
+      Str.push_back(*Prefix);
  2192
+      ++Prefix;
  2193
+    };
2173 2194
     Str.push_back('0');
2174 2195
     return;
2175 2196
   }
@@ -2193,6 +2214,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2193 2214
       }
2194 2215
     }
2195 2216
 
  2217
+    while (*Prefix) {
  2218
+      Str.push_back(*Prefix);
  2219
+      ++Prefix;
  2220
+    };
  2221
+
2196 2222
     while (N) {
2197 2223
       *--BufPtr = Digits[N % Radix];
2198 2224
       N /= Radix;
@@ -2212,6 +2238,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2212 2238
     Str.push_back('-');
2213 2239
   }
2214 2240
 
  2241
+  while (*Prefix) {
  2242
+    Str.push_back(*Prefix);
  2243
+    ++Prefix;
  2244
+  };
  2245
+
2215 2246
   // We insert the digits backward, then reverse them to get the right order.
2216 2247
   unsigned StartDig = Str.size();
2217 2248
 
@@ -2251,7 +2282,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2251 2282
 /// to the methods above.
2252 2283
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2253 2284
   SmallString<40> S;
2254  
-  toString(S, Radix, Signed);
  2285
+  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2255 2286
   return S.str();
2256 2287
 }
2257 2288
 
@@ -2266,7 +2297,7 @@ void APInt::dump() const {
2266 2297
 
2267 2298
 void APInt::print(raw_ostream &OS, bool isSigned) const {
2268 2299
   SmallString<40> S;
2269  
-  this->toString(S, 10, isSigned);
  2300
+  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2270 2301
   OS << S.str();
2271 2302
 }
2272 2303
 
119  lib/Target/ARM/ARMISelLowering.cpp
@@ -5523,12 +5523,109 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
5523 5523
   return SDValue();
5524 5524
 }
5525 5525
 
  5526
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 
  5527
+// (only after legalization).
  5528
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
  5529
+                                 TargetLowering::DAGCombinerInfo &DCI,
  5530
+                                 const ARMSubtarget *Subtarget) {
  5531
+
  5532
+  // Only perform optimization if after legalize, and if NEON is available. We
  5533
+  // also expected both operands to be BUILD_VECTORs.
  5534
+  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
  5535
+      || N0.getOpcode() != ISD::BUILD_VECTOR
  5536
+      || N1.getOpcode() != ISD::BUILD_VECTOR)
  5537
+    return SDValue();
  5538
+
  5539
+  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
  5540
+  EVT VT = N->getValueType(0);
  5541
+  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
  5542
+    return SDValue();
  5543
+
  5544
+  // Check that the vector operands are of the right form.
  5545
+  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
  5546
+  // operands, where N is the size of the formed vector.
  5547
+  // Each EXTRACT_VECTOR should have the same input vector and odd or even
  5548
+  // index such that we have a pair wise add pattern.
  5549
+
  5550
+  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
  5551
+  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  5552
+    return SDValue();
  5553
+  SDValue Vec = N0->getOperand(0)->getOperand(0);
  5554
+  SDNode *V = Vec.getNode();
  5555
+  unsigned nextIndex = 0;
  5556
+
  5557
+  // For each operands to the ADD which are BUILD_VECTORs, 
  5558
+  // check to see if each of their operands are an EXTRACT_VECTOR with
  5559
+  // the same vector and appropriate index.
  5560
+  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
  5561
+    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
  5562
+        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
  5563
+      
  5564
+      SDValue ExtVec0 = N0->getOperand(i);
  5565
+      SDValue ExtVec1 = N1->getOperand(i);
  5566
+      
  5567
+      // First operand is the vector, verify its the same.
  5568
+      if (V != ExtVec0->getOperand(0).getNode() ||
  5569
+          V != ExtVec1->getOperand(0).getNode())
  5570
+        return SDValue();
  5571
+      
  5572
+      // Second is the constant, verify its correct.
  5573
+      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
  5574
+      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
  5575
+      
  5576
+      // For the constant, we want to see all the even or all the odd.
  5577
+      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
  5578
+          || C1->getZExtValue() != nextIndex+1)
  5579
+        return SDValue();
  5580
+
  5581
+      // Increment index.
  5582
+      nextIndex+=2;
  5583
+    } else 
  5584
+      return SDValue();
  5585
+  }
  5586
+
  5587
+  // Create VPADDL node.
  5588
+  SelectionDAG &DAG = DCI.DAG;
  5589
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  5590
+  DebugLoc DL = N->getDebugLoc();
  5591
+
  5592
+  // Build operand list.
  5593
+  SmallVector<SDValue, 8> Ops;
  5594
+  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
  5595
+                                TLI.getPointerTy()));
  5596
+
  5597
+  // Input is the vector.
  5598
+  Ops.push_back(Vec);
  5599
+  
  5600
+  // Get widened type and narrowed type.
  5601
+  MVT widenType;
  5602
+  unsigned numElem = VT.getVectorNumElements();
  5603
+  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
  5604
+    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
  5605
+    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
  5606
+    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
  5607
+    default:
  5608
+      assert(0 && "Invalid vector element type for padd optimization.");
  5609
+  }
  5610
+
  5611
+  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
  5612
+                            widenType, &Ops[0], Ops.size());
  5613
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
  5614
+}
  5615
+
5526 5616
 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
5527 5617
 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
5528 5618
 /// called with the default operands, and if that fails, with commuted
5529 5619
 /// operands.
5530 5620
 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
5531  
-                                         TargetLowering::DAGCombinerInfo &DCI) {
  5621
+                                          TargetLowering::DAGCombinerInfo &DCI,
  5622
+                                          const ARMSubtarget *Subtarget){
  5623
+
  5624
+  // Attempt to create vpaddl for this add.
  5625
+  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
  5626
+  if (Result.getNode())
  5627
+    return Result;
  5628
+  
5532 5629
   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
5533 5630
   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
5534 5631
     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
@@ -5540,17 +5637,18 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
5540 5637
 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
5541 5638
 ///
5542 5639
 static SDValue PerformADDCombine(SDNode *N,
5543  
-                                 TargetLowering::DAGCombinerInfo &DCI) {
  5640
+                                 TargetLowering::DAGCombinerInfo &DCI,
  5641
+                                 const ARMSubtarget *Subtarget) {
5544 5642
   SDValue N0 = N->getOperand(0);
5545 5643
   SDValue N1 = N->getOperand(1);
5546 5644
 
5547 5645
   // First try with the default operand order.
5548  
-  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
  5646
+  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
5549 5647
   if (Result.getNode())
5550 5648
     return Result;
5551 5649
 
5552 5650
   // If that didn't work, try again with the operands commuted.
5553  
-  return PerformADDCombineWithOperands(N, N1, N0, DCI);
  5651
+  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
5554 5652
 }
5555 5653
 
5556 5654
 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
@@ -5875,8 +5973,8 @@ static SDValue PerformORCombine(SDNode *N,
5875 5973
   return SDValue();
5876 5974
 }
5877 5975
 
5878  
-/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
5879  
-/// C1 & C2 == C1.
  5976
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
  5977
+/// the bits being cleared by the AND are not demanded by the BFI.
5880 5978
 static SDValue PerformBFICombine(SDNode *N,
5881 5979
                                  TargetLowering::DAGCombinerInfo &DCI) {
5882 5980
   SDValue N1 = N->getOperand(1);
@@ -5884,9 +5982,12 @@ static SDValue PerformBFICombine(SDNode *N,
5884 5982
     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5885 5983
     if (!N11C)
5886 5984
       return SDValue();
5887  
-    unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
  5985
+    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
  5986
+    unsigned LSB = CountTrailingZeros_32(~InvMask);
  5987
+    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
  5988
+    unsigned Mask = (1 << Width)-1;
5888 5989
     unsigned Mask2 = N11C->getZExtValue();
5889  
-    if ((Mask & Mask2) == Mask2)
  5990
+    if ((Mask & (~Mask2)) == 0)
5890 5991
       return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
5891 5992
                              N->getOperand(0), N1.getOperand(0),
5892 5993
                              N->getOperand(2));
@@ -6755,7 +6856,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
6755 6856
                                              DAGCombinerInfo &DCI) const {
6756 6857
   switch (N->getOpcode()) {
6757 6858
   default: break;
6758  
-  case ISD::ADD:        return PerformADDCombine(N, DCI);
  6859
+  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
6759 6860
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
6760 6861
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
6761 6862
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
4  lib/Target/ARM/ARMInstrInfo.td
@@ -3029,6 +3029,10 @@ def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
3029 3029
                                (shl GPR:$Rm, (i32 8))), i16),
3030 3030
                (REVSH GPR:$Rm)>;
3031 3031
 
  3032
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
  3033
+                   (and (srl GPR:$Rm, (i32 8)), 0xFF)),
  3034
+               (REVSH GPR:$Rm)>;
  3035
+
3032 3036
 // Need the AddedComplexity or else MOVs + REV would be chosen.
3033 3037
 let AddedComplexity = 5 in
3034 3038
 def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
4  lib/Target/ARM/ARMInstrThumb2.td
@@ -2604,6 +2604,10 @@ def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
2604 2604
                             (shl rGPR:$Rm, (i32 8))), i16),
2605 2605
             (t2REVSH rGPR:$Rm)>;
2606 2606
 
  2607
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
  2608
+                   (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
  2609
+            (t2REVSH rGPR:$Rm)>;
  2610
+
2607 2611
 def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
2608 2612
 
2609 2613
 def t2PKHBT : T2ThreeReg<
4  lib/Target/CBackend/CBackend.cpp
@@ -278,7 +278,7 @@ namespace {
278 278
       return AI;
279