diff --git a/clang/include/clang/CodeGen/SwiftCallingConv.h b/clang/include/clang/CodeGen/SwiftCallingConv.h
new file mode 100644
index 0000000000000..f9c2fd94ca8d7
--- /dev/null
+++ b/clang/include/clang/CodeGen/SwiftCallingConv.h
@@ -0,0 +1,168 @@
+//==-- SwiftCallingConv.h - Swift ABI lowering -----------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines constants and types related to Swift ABI lowering.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_CODEGEN_SWIFTCALLINGCONV_H
+#define LLVM_CLANG_CODEGEN_SWIFTCALLINGCONV_H
+
+#include "clang/AST/CanonicalType.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/Type.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+
+namespace llvm {
+  class IntegerType;
+  class Type;
+  class StructType;
+  class VectorType;
+}
+
+namespace clang {
+class Decl;
+class FieldDecl;
+class ASTRecordLayout;
+
+namespace CodeGen {
+class ABIArgInfo;
+class CodeGenModule;
+class CGFunctionInfo;
+
+namespace swiftcall {
+
+class SwiftAggLowering {
+  CodeGenModule &CGM;
+
+  struct StorageEntry {
+    CharUnits Begin;
+    CharUnits End;
+    llvm::Type *Type;
+
+    CharUnits getWidth() const {
+      return End - Begin;
+    }
+  };
+  SmallVector<StorageEntry, 4> Entries;
+  bool Finished = false;
+
+public:
+  SwiftAggLowering(CodeGenModule &CGM) : CGM(CGM) {}
+
+  void addOpaqueData(CharUnits begin, CharUnits end) {
+    addEntry(nullptr, begin, end);
+  }
+
+  void addTypedData(QualType type, CharUnits begin);
+  void addTypedData(const RecordDecl *record, CharUnits begin);
+  void addTypedData(const RecordDecl *record, CharUnits begin,
+                    const ASTRecordLayout &layout);
+  void addTypedData(llvm::Type *type, CharUnits begin);
+  void addTypedData(llvm::Type *type, CharUnits begin, CharUnits end);
+
+  void finish();
+
+  /// Does this lowering require passing any data?
+  bool empty() const {
+    assert(Finished && "didn't finish lowering before calling empty()");
+    return Entries.empty();
+  }
+
+  /// According to the target Swift ABI, should a value with this lowering
+  /// be passed indirectly?
+  ///
+  /// Note that this decision is based purely on the data layout of the
+  /// value and does not consider whether the type is address-only,
+  /// must be passed indirectly to match a function abstraction pattern, or
+  /// anything else that is expected to be handled by high-level lowering.
+  ///
+  /// \param asReturnValue - if true, answer whether it should be passed
+  ///   indirectly as a return value; if false, answer whether it should be
+  ///   passed indirectly as an argument
+  bool shouldPassIndirectly(bool asReturnValue) const;
+
+  using EnumerationCallback =
+    llvm::function_ref<void(CharUnits offset, llvm::Type *type)>;
+
+  /// Enumerate the expanded components of this type.
+  ///
+  /// The component types will always be legal vector, floating-point,
+  /// integer, or pointer types.
+  void enumerateComponents(EnumerationCallback callback) const;
+
+  /// Return the types for a coerce-and-expand operation.
+  ///
+  /// The first type matches the memory layout of the data that's been
+  /// added to this structure, including explicit [N x i8] arrays for any
+  /// internal padding.
+  ///
+  /// The second type removes any internal padding members and, if only
+  /// one element remains, is simply that element type.
+  std::pair<llvm::StructType*, llvm::Type*> getCoerceAndExpandTypes() const;
+
+private:
+  void addBitFieldData(const FieldDecl *field, CharUnits begin,
+                       uint64_t bitOffset);
+  void addLegalTypedData(llvm::Type *type, CharUnits begin, CharUnits end);
+  void addEntry(llvm::Type *type, CharUnits begin, CharUnits end);
+  void splitVectorEntry(unsigned index);
+};
+
+/// Return the maximum voluntary integer size for the current target.
+CharUnits getMaximumVoluntaryIntegerSize(CodeGenModule &CGM);
+
+/// Return the Swift CC's notion of the natural alignment of a type.
+CharUnits getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type);
+
+/// Is the given integer type "legal" for Swift's perspective on the
+/// current platform?
+bool isLegalIntegerType(CodeGenModule &CGM, llvm::IntegerType *type);
+
+/// Is the given vector type "legal" for Swift's perspective on the
+/// current platform?
+bool isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                       llvm::VectorType *vectorTy);
+bool isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                       llvm::Type *eltTy, unsigned numElts);
+
+/// Minimally split a legal vector type.
+std::pair<llvm::Type*, unsigned>
+splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                     llvm::VectorType *vectorTy);
+
+/// Turn a vector type in a sequence of legal component vector types.
+///
+/// The caller may assume that the sum of the data sizes of the resulting
+/// types will equal the data size of the vector type.
+void legalizeVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                        llvm::VectorType *vectorTy,
+                        llvm::SmallVectorImpl<llvm::Type*> &types);
+
+/// Should a C++ record type be passed and returned indirectly?
+bool shouldPassCXXRecordIndirectly(CodeGenModule &CGM,
+                                   const CXXRecordDecl *record);
+
+/// Classify the rules for how to return a particular type.
+ABIArgInfo classifyReturnType(CodeGenModule &CGM, CanQualType type);
+
+/// Classify the rules for how to pass a particular type.
+ABIArgInfo classifyArgumentType(CodeGenModule &CGM, CanQualType type);
+
+/// Compute the ABI information of a swiftcall function.  This is a
+/// private interface for Clang.
+void computeABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI);
+
+} // end namespace swiftcall
+} // end namespace CodeGen
+} // end namespace clang
+
+#endif
diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 24410715df68e..530a7ef560c5b 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -18,20 +18,25 @@ namespace llvm {
   class Value;
   class LLVMContext;
   class DataLayout;
+  class Type;
 }
 
 namespace clang {
   class ASTContext;
   class TargetInfo;
 
-  namespace CodeGen {
-    class ABIArgInfo;
-    class Address;
-    class CGCXXABI;
-    class CGFunctionInfo;
-    class CodeGenFunction;
-    class CodeGenTypes;
-  }
+namespace CodeGen {
+  class ABIArgInfo;
+  class Address;
+  class CGCXXABI;
+  class CGFunctionInfo;
+  class CodeGenFunction;
+  class CodeGenTypes;
+  class SwiftABIInfo;
+
+namespace swiftcall {
+  class SwiftAggLowering;
+}
 
   // FIXME: All of this stuff should be part of the target interface
   // somehow. It is currently here because it is not clear how to factor
@@ -55,6 +60,8 @@ namespace clang {
 
     virtual ~ABIInfo();
 
+    virtual bool supportsSwift() const { return false; }
+
     CodeGen::CGCXXABI &getCXXABI() const;
     ASTContext &getContext() const;
     llvm::LLVMContext &getVMContext() const;
@@ -112,7 +119,35 @@ namespace clang {
 
     CodeGen::ABIArgInfo
     getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const;
+
+
   };
+
+  /// A refining implementation of ABIInfo for targets that support swiftcall.
+  ///
+  /// If we find ourselves wanting multiple such refinements, they'll probably
+  /// be independent refinements, and we should probably find another way
+  /// to do it than simple inheritance.
+  class SwiftABIInfo : public ABIInfo {
+  public:
+    SwiftABIInfo(CodeGen::CodeGenTypes &cgt) : ABIInfo(cgt) {}
+
+    bool supportsSwift() const final override { return true; }
+
+    virtual bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                              ArrayRef<llvm::Type*> types,
+                                              bool asReturnValue) const = 0;
+
+    virtual bool isLegalVectorTypeForSwift(CharUnits totalSize,
+                                           llvm::Type *eltTy,
+                                           unsigned elts) const;
+
+    static bool classof(const ABIInfo *info) {
+      return info->supportsSwift();
+    }
+  };
+
+}  // end namespace CodeGen
 }  // end namespace clang
 
 #endif
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 0693949e4366e..cbd7422e6aae7 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -26,6 +26,7 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/Attributes.h"
@@ -59,6 +60,7 @@ static unsigned ClangCallConvToLLVMCallConv(CallingConv CC) {
   case CC_SpirKernel: return llvm::CallingConv::SPIR_KERNEL;
   case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
   case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
+  case CC_Swift: return llvm::CallingConv::Swift;
   }
 }
 
@@ -109,7 +111,7 @@ static void appendParameterTypes(const CodeGenTypes &CGT,
     auto protoParamInfos = FPT->getExtParameterInfos();
     paramInfos.reserve(prefix.size() + protoParamInfos.size());
     paramInfos.resize(prefix.size());
-    paramInfos.append(paramInfos.begin(), paramInfos.end());
+    paramInfos.append(protoParamInfos.begin(), protoParamInfos.end());
   }
 
   // Fast path: unknown target.
@@ -590,7 +592,6 @@ CodeGenTypes::arrangeBuiltinFunctionDeclaration(CanQualType resultType,
       argTypes, FunctionType::ExtInfo(), {}, RequiredArgs::All);
 }
 
-
 /// Arrange a call to a C++ method, passing the given arguments.
 const CGFunctionInfo &
 CodeGenTypes::arrangeCXXMethodCall(const CallArgList &args,
@@ -679,7 +680,11 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   assert(inserted && "Recursively being processed?");
   
   // Compute ABI information.
-  getABIInfo().computeInfo(*FI);
+  if (info.getCC() != CC_Swift) {
+    getABIInfo().computeInfo(*FI);
+  } else {
+    swiftcall::computeABIInfo(CGM, *FI);
+  }
 
   // Loop over all of the computed argument and return value info.  If any of
   // them are direct or extend without a specified coerce type, specify the
@@ -918,7 +923,7 @@ static void forConstantArrayExpansion(CodeGenFunction &CGF,
 }
 
 void CodeGenFunction::ExpandTypeFromArgs(
-    QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) {
+    QualType Ty, LValue LV, SmallVectorImpl<llvm::Value *>::iterator &AI) {
   assert(LV.isSimple() &&
          "Unexpected non-simple lvalue during struct expansion.");
 
@@ -1813,10 +1818,13 @@ void CodeGenModule::ConstructAttributeList(
         getLLVMContext(), llvm::AttributeSet::ReturnIndex, RetAttrs));
   }
 
+  bool hasUsedSRet = false;
+
   // Attach attributes to sret.
   if (IRFunctionArgs.hasSRetArg()) {
     llvm::AttrBuilder SRETAttrs;
     SRETAttrs.addAttribute(llvm::Attribute::StructRet);
+    hasUsedSRet = true;
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
     PAL.push_back(llvm::AttributeSet::get(
@@ -1920,6 +1928,41 @@ void CodeGenModule::ConstructAttributeList(
         Attrs.addAttribute(llvm::Attribute::NonNull);
     }
 
+    switch (FI.getExtParameterInfo(ArgNo).getABI()) {
+    case ParameterABI::Ordinary:
+      break;
+
+    case ParameterABI::SwiftIndirectResult: {
+      // Add 'sret' if we haven't already used it for something, but
+      // only if the result is void.
+      if (!hasUsedSRet && RetTy->isVoidType()) {
+        Attrs.addAttribute(llvm::Attribute::StructRet);
+        hasUsedSRet = true;
+      }
+
+      // Add 'noalias' in either case.
+      Attrs.addAttribute(llvm::Attribute::NoAlias);
+
+      // Add 'dereferenceable' and 'alignment'.
+      auto PTy = ParamType->getPointeeType();
+      if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
+        auto info = getContext().getTypeInfoInChars(PTy);
+        Attrs.addDereferenceableAttr(info.first.getQuantity());
+        Attrs.addAttribute(llvm::Attribute::getWithAlignment(getLLVMContext(),
+                                                 info.second.getQuantity()));
+      }
+      break;
+    }
+
+    case ParameterABI::SwiftErrorResult:
+      Attrs.addAttribute(llvm::Attribute::SwiftError);
+      break;
+
+    case ParameterABI::SwiftContext:
+      Attrs.addAttribute(llvm::Attribute::SwiftSelf);
+      break;
+    }
+
     if (Attrs.hasAttributes()) {
       unsigned FirstIRArg, NumIRArgs;
       std::tie(FirstIRArg, NumIRArgs) = IRFunctionArgs.getIRArgs(ArgNo);
@@ -1985,6 +2028,18 @@ static const NonNullAttr *getNonNullAttr(const Decl *FD, const ParmVarDecl *PVD,
   return nullptr;
 }
 
+namespace {
+  struct CopyBackSwiftError final : EHScopeStack::Cleanup {
+    Address Temp;
+    Address Arg;
+    CopyBackSwiftError(Address temp, Address arg) : Temp(temp), Arg(arg) {}
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      llvm::Value *errorValue = CGF.Builder.CreateLoad(Temp);
+      CGF.Builder.CreateStore(errorValue, Arg);
+    }
+  };
+}
+
 void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                          llvm::Function *Fn,
                                          const FunctionArgList &Args) {
@@ -2010,7 +2065,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), FI);
   // Flattened function arguments.
-  SmallVector<llvm::Argument *, 16> FnArgs;
+  SmallVector<llvm::Value *, 16> FnArgs;
   FnArgs.reserve(IRFunctionArgs.totalIRArgs());
   for (auto &Arg : Fn->args()) {
     FnArgs.push_back(&Arg);
@@ -2031,7 +2086,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   // Name the struct return parameter.
   if (IRFunctionArgs.hasSRetArg()) {
-    auto AI = FnArgs[IRFunctionArgs.getSRetArgNo()];
+    auto AI = cast<llvm::Argument>(FnArgs[IRFunctionArgs.getSRetArgNo()]);
     AI->setName("agg.result");
     AI->addAttr(llvm::AttributeSet::get(getLLVMContext(), AI->getArgNo() + 1,
                                         llvm::Attribute::NoAlias));
@@ -2119,8 +2174,8 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
           ArgI.getCoerceToType() == ConvertType(Ty) &&
           ArgI.getDirectOffset() == 0) {
         assert(NumIRArgs == 1);
-        auto AI = FnArgs[FirstIRArg];
-        llvm::Value *V = AI;
+        llvm::Value *V = FnArgs[FirstIRArg];
+        auto AI = cast<llvm::Argument>(V);
 
         if (const ParmVarDecl *PVD = dyn_cast<ParmVarDecl>(Arg)) {
           if (getNonNullAttr(CurCodeDecl, PVD, PVD->getType(),
@@ -2189,6 +2244,25 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
                                               AI->getArgNo() + 1,
                                               llvm::Attribute::NoAlias));
 
+        // LLVM expects swifterror parameters to be used in very restricted
+        // ways.  Copy the value into a less-restricted temporary.
+        if (FI.getExtParameterInfo(ArgNo).getABI()
+              == ParameterABI::SwiftErrorResult) {
+          QualType pointeeTy = Ty->getPointeeType();
+          assert(pointeeTy->isPointerType());
+          Address temp =
+            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
+          Address arg = Address(V, getContext().getTypeAlignInChars(pointeeTy));
+          llvm::Value *incomingErrorValue = Builder.CreateLoad(arg);
+          Builder.CreateStore(incomingErrorValue, temp);
+          V = temp.getPointer();
+
+          // Push a cleanup to copy the value back at the end of the function.
+          // The convention does not guarantee that the value will be written
+          // back if the function exits with an unwind exception.
+          EHStack.pushCleanup<CopyBackSwiftError>(NormalCleanup, temp, arg);
+        }
+
         // Ensure the argument is the correct type.
         if (V->getType() != ArgI.getCoerceToType())
           V = Builder.CreateBitCast(V, ArgI.getCoerceToType());
@@ -3481,6 +3555,9 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
   }
 
+  Address swiftErrorTemp = Address::invalid();
+  Address swiftErrorArg = Address::invalid();
+
   assert(CallInfo.arg_size() == CallArgs.size() &&
          "Mismatch between function signature & arguments.");
   unsigned ArgNo = 0;
@@ -3587,6 +3664,25 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         else
           V = Builder.CreateLoad(RV.getAggregateAddress());
 
+        // Implement swifterror by copying into a new swifterror argument.
+        // We'll write back in the normal path out of the call.
+        if (CallInfo.getExtParameterInfo(ArgNo).getABI()
+              == ParameterABI::SwiftErrorResult) {
+          assert(!swiftErrorTemp.isValid() && "multiple swifterror args");
+
+          QualType pointeeTy = I->Ty->getPointeeType();
+          swiftErrorArg =
+            Address(V, getContext().getTypeAlignInChars(pointeeTy));
+
+          swiftErrorTemp =
+            CreateMemTemp(pointeeTy, getPointerAlign(), "swifterror.temp");
+          V = swiftErrorTemp.getPointer();
+          cast<llvm::AllocaInst>(V)->setSwiftError(true);
+
+          llvm::Value *errorValue = Builder.CreateLoad(swiftErrorArg);
+          Builder.CreateStore(errorValue, swiftErrorTemp);
+        }
+
         // We might have to widen integers, but we should never truncate.
         if (ArgInfo.getCoerceToType() != V->getType() &&
             V->getType()->isIntegerTy())
@@ -3597,6 +3693,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         if (FirstIRArg < IRFuncTy->getNumParams() &&
             V->getType() != IRFuncTy->getParamType(FirstIRArg))
           V = Builder.CreateBitCast(V, IRFuncTy->getParamType(FirstIRArg));
+
         IRCallArgs[FirstIRArg] = V;
         break;
       }
@@ -3656,13 +3753,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
 
     case ABIArgInfo::CoerceAndExpand: {
-      assert(RV.isAggregate() &&
-             "CoerceAndExpand does not support non-aggregate types yet");
-
       auto coercionType = ArgInfo.getCoerceAndExpandType();
       auto layout = CGM.getDataLayout().getStructLayout(coercionType);
 
-      Address addr = RV.getAggregateAddress();
+      llvm::Value *tempSize = nullptr;
+      Address addr = Address::invalid();
+      if (RV.isAggregate()) {
+        addr = RV.getAggregateAddress();
+      } else {
+        assert(RV.isScalar()); // complex should always just be direct
+
+        llvm::Type *scalarType = RV.getScalarVal()->getType();
+        auto scalarSize = CGM.getDataLayout().getTypeAllocSize(scalarType);
+        auto scalarAlign = CGM.getDataLayout().getPrefTypeAlignment(scalarType);
+
+        tempSize = llvm::ConstantInt::get(CGM.Int64Ty, scalarSize);
+
+        // Materialize to a temporary.
+        addr = CreateTempAlloca(RV.getScalarVal()->getType(),
+                 CharUnits::fromQuantity(std::max(layout->getAlignment(),
+                                                  scalarAlign)));
+        EmitLifetimeStart(scalarSize, addr.getPointer());
+
+        Builder.CreateStore(RV.getScalarVal(), addr);
+      }
+
       addr = Builder.CreateElementBitCast(addr, coercionType);
 
       unsigned IRArgPos = FirstIRArg;
@@ -3675,6 +3790,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       }
       assert(IRArgPos == FirstIRArg + NumIRArgs);
 
+      if (tempSize) {
+        EmitLifetimeEnd(tempSize, addr.getPointer());
+      }
+
       break;
     }
 
@@ -3853,6 +3972,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   if (!CI->getType()->isVoidTy())
     CI->setName("call");
 
+  // Perform the swifterror writeback.
+  if (swiftErrorTemp.isValid()) {
+    llvm::Value *errorResult = Builder.CreateLoad(swiftErrorTemp);
+    Builder.CreateStore(errorResult, swiftErrorArg);
+  }
+
   // Emit any writebacks immediately.  Arguably this should happen
   // after any return-value munging.
   if (CallArgs.hasWritebacks())
@@ -3870,15 +3995,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   RValue Ret = [&] {
     switch (RetAI.getKind()) {
-    case ABIArgInfo::InAlloca:
-    case ABIArgInfo::Indirect: {
-      RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
-      if (UnusedReturnSize)
-        EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
-                        SRetPtr.getPointer());
-      return ret;
-    }
-
     case ABIArgInfo::CoerceAndExpand: {
       auto coercionType = RetAI.getCoerceAndExpandType();
       auto layout = CGM.getDataLayout().getStructLayout(coercionType);
@@ -3886,15 +4002,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       Address addr = SRetPtr;
       addr = Builder.CreateElementBitCast(addr, coercionType);
 
+      assert(CI->getType() == RetAI.getUnpaddedCoerceAndExpandType());
+      bool requiresExtract = isa<llvm::StructType>(CI->getType());
+
       unsigned unpaddedIndex = 0;
       for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
         llvm::Type *eltType = coercionType->getElementType(i);
         if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
         Address eltAddr = Builder.CreateStructGEP(addr, i, layout);
-        llvm::Value *elt = Builder.CreateExtractValue(CI, unpaddedIndex++);
+        llvm::Value *elt = CI;
+        if (requiresExtract)
+          elt = Builder.CreateExtractValue(elt, unpaddedIndex++);
+        else
+          assert(unpaddedIndex == 0);
         Builder.CreateStore(elt, eltAddr);
       }
-      break;
+      // FALLTHROUGH
+    }
+
+    case ABIArgInfo::InAlloca:
+    case ABIArgInfo::Indirect: {
+      RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
+      if (UnusedReturnSize)
+        EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
+                        SRetPtr.getPointer());
+      return ret;
     }
 
     case ABIArgInfo::Ignore:
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index 353f60163172f..2be6dee959062 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -76,6 +76,7 @@ add_clang_library(clangCodeGen
   ModuleBuilder.cpp
   ObjectFilePCHContainerOperations.cpp
   SanitizerMetadata.cpp
+  SwiftCallingConv.cpp
   TargetInfo.cpp
 
   DEPENDS
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index c4e5ecd9000c3..c21fd1d61f9e2 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -68,7 +68,6 @@ class ObjCMethodDecl;
 class ObjCImplementationDecl;
 class ObjCPropertyImplDecl;
 class TargetInfo;
-class TargetCodeGenInfo;
 class VarDecl;
 class ObjCForCollectionStmt;
 class ObjCAtTryStmt;
@@ -86,6 +85,7 @@ class BlockByrefHelpers;
 class BlockByrefInfo;
 class BlockFlags;
 class BlockFieldFlags;
+class TargetCodeGenInfo;
 
 /// The kind of evaluation to perform on values of a particular
 /// type.  Basically, is the code in CGExprScalar, CGExprComplex, or
@@ -3110,7 +3110,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   ///
   /// \param AI - The first function argument of the expansion.
   void ExpandTypeFromArgs(QualType Ty, LValue Dst,
-                          SmallVectorImpl<llvm::Argument *>::iterator &AI);
+                          SmallVectorImpl<llvm::Value *>::iterator &AI);
 
   /// ExpandTypeToArgs - Expand an RValue \arg RV, with the LLVM type for \arg
   /// Ty, into individual arguments on the provided vector \arg IRCallArgs,
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 8e662c2537032..f76f0ba2d1a1a 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -49,7 +49,6 @@ class IndexedInstrProfReader;
 }
 
 namespace clang {
-class TargetCodeGenInfo;
 class ASTContext;
 class AtomicType;
 class FunctionDecl;
@@ -93,6 +92,7 @@ class CGCUDARuntime;
 class BlockFieldFlags;
 class FunctionArgList;
 class CoverageMappingModuleGen;
+class TargetCodeGenInfo;
 
 struct OrderGlobalInits {
   unsigned int priority;
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index d3ee0aff2c7fb..c77790100a89b 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -31,7 +31,6 @@ class StructType;
 }
 
 namespace clang {
-class ABIInfo;
 class ASTContext;
 template <typename> class CanQual;
 class CXXConstructorDecl;
@@ -51,6 +50,7 @@ class Type;
 typedef CanQual<Type> CanQualType;
 
 namespace CodeGen {
+class ABIInfo;
 class CGCXXABI;
 class CGRecordLayout;
 class CodeGenModule;
diff --git a/clang/lib/CodeGen/SwiftCallingConv.cpp b/clang/lib/CodeGen/SwiftCallingConv.cpp
new file mode 100644
index 0000000000000..6fae19f277939
--- /dev/null
+++ b/clang/lib/CodeGen/SwiftCallingConv.cpp
@@ -0,0 +1,830 @@
+//===--- SwiftCallingConv.cpp - Lowering for the Swift calling convention -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the abstract lowering for the Swift calling convention.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/CodeGen/SwiftCallingConv.h"
+#include "clang/Basic/TargetInfo.h"
+#include "CodeGenModule.h"
+#include "TargetInfo.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace swiftcall;
+
+static const SwiftABIInfo &getSwiftABIInfo(CodeGenModule &CGM) {
+  return cast<SwiftABIInfo>(CGM.getTargetCodeGenInfo().getABIInfo());
+}
+
+static bool isPowerOf2(unsigned n) {
+  return n == (n & -n);
+}
+
+/// Given two types with the same size, try to find a common type.
+static llvm::Type *getCommonType(llvm::Type *first, llvm::Type *second) {
+  assert(first != second);
+
+  // Allow pointers to merge with integers, but prefer the integer type.
+  if (first->isIntegerTy()) {
+    if (second->isPointerTy()) return first;
+  } else if (first->isPointerTy()) {
+    if (second->isIntegerTy()) return second;
+    if (second->isPointerTy()) return first;
+
+  // Allow two vectors to be merged (given that they have the same size).
+  // This assumes that we never have two different vector register sets.
+  } else if (auto firstVecTy = dyn_cast<llvm::VectorType>(first)) {
+    if (auto secondVecTy = dyn_cast<llvm::VectorType>(second)) {
+      if (auto commonTy = getCommonType(firstVecTy->getElementType(),
+                                        secondVecTy->getElementType())) {
+        return (commonTy == firstVecTy->getElementType() ? first : second);
+      }
+    }
+  }
+
+  return nullptr;
+}
+
+static CharUnits getTypeStoreSize(CodeGenModule &CGM, llvm::Type *type) {
+  return CharUnits::fromQuantity(CGM.getDataLayout().getTypeStoreSize(type));
+}
+
+void SwiftAggLowering::addTypedData(QualType type, CharUnits begin) {
+  // Deal with various aggregate types as special cases:
+
+  // Record types.
+  if (auto recType = type->getAs<RecordType>()) {
+    addTypedData(recType->getDecl(), begin);
+
+  // Array types.
+  } else if (type->isArrayType()) {
+    // Incomplete array types (flexible array members?) don't provide
+    // data to lay out, and the other cases shouldn't be possible.
+    auto arrayType = CGM.getContext().getAsConstantArrayType(type);
+    if (!arrayType) return;
+
+    QualType eltType = arrayType->getElementType();
+    auto eltSize = CGM.getContext().getTypeSizeInChars(eltType);
+    for (uint64_t i = 0, e = arrayType->getSize().getZExtValue(); i != e; ++i) {
+      addTypedData(eltType, begin + i * eltSize);
+    }
+
+  // Complex types.
+  } else if (auto complexType = type->getAs<ComplexType>()) {
+    auto eltType = complexType->getElementType();
+    auto eltSize = CGM.getContext().getTypeSizeInChars(eltType);
+    auto eltLLVMType = CGM.getTypes().ConvertType(eltType);
+    addTypedData(eltLLVMType, begin, begin + eltSize);
+    addTypedData(eltLLVMType, begin + eltSize, begin + 2 * eltSize);
+
+  // Member pointer types.
+  } else if (type->getAs<MemberPointerType>()) {
+    // Just add it all as opaque.
+    addOpaqueData(begin, begin + CGM.getContext().getTypeSizeInChars(type));
+
+  // Everything else is scalar and should not convert as an LLVM aggregate.
+  } else {
+    // We intentionally convert as !ForMem because we want to preserve
+    // that a type was an i1.
+    auto llvmType = CGM.getTypes().ConvertType(type);
+    addTypedData(llvmType, begin);
+  }
+}
+
+void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin) {
+  addTypedData(record, begin, CGM.getContext().getASTRecordLayout(record));
+}
+
+void SwiftAggLowering::addTypedData(const RecordDecl *record, CharUnits begin,
+                                    const ASTRecordLayout &layout) {
+  // Unions are a special case.
+  if (record->isUnion()) {
+    for (auto field : record->fields()) {
+      if (field->isBitField()) {
+        addBitFieldData(field, begin, 0);
+      } else {
+        addTypedData(field->getType(), begin);
+      }
+    }
+    return;
+  }
+
+  // Note that correctness does not rely on us adding things in
+  // their actual order of layout; it's just somewhat more efficient
+  // for the builder.
+
+  // With that in mind, add "early" C++ data.
+  auto cxxRecord = dyn_cast<CXXRecordDecl>(record);
+  if (cxxRecord) {
+    //   - a v-table pointer, if the class adds its own
+    if (layout.hasOwnVFPtr()) {
+      addTypedData(CGM.Int8PtrTy, begin);
+    }
+
+    //   - non-virtual bases
+    for (auto &baseSpecifier : cxxRecord->bases()) {
+      if (baseSpecifier.isVirtual()) continue;
+
+      auto baseRecord = baseSpecifier.getType()->getAsCXXRecordDecl();
+      addTypedData(baseRecord, begin + layout.getBaseClassOffset(baseRecord));
+    }
+
+    //   - a vbptr if the class adds its own
+    if (layout.hasOwnVBPtr()) {
+      addTypedData(CGM.Int8PtrTy, begin + layout.getVBPtrOffset());
+    }
+  }
+
+  // Add fields.
+  for (auto field : record->fields()) {
+    auto fieldOffsetInBits = layout.getFieldOffset(field->getFieldIndex());
+    if (field->isBitField()) {
+      addBitFieldData(field, begin, fieldOffsetInBits);
+    } else {
+      addTypedData(field->getType(),
+              begin + CGM.getContext().toCharUnitsFromBits(fieldOffsetInBits));
+    }
+  }
+
+  // Add "late" C++ data:
+  if (cxxRecord) {
+    //   - virtual bases
+    for (auto &vbaseSpecifier : cxxRecord->vbases()) {
+      auto baseRecord = vbaseSpecifier.getType()->getAsCXXRecordDecl();
+      addTypedData(baseRecord, begin + layout.getVBaseClassOffset(baseRecord));      
+    }
+  }
+}
+
+void SwiftAggLowering::addBitFieldData(const FieldDecl *bitfield,
+                                       CharUnits recordBegin,
+                                       uint64_t bitfieldBitBegin) {
+  assert(bitfield->isBitField());
+  auto &ctx = CGM.getContext();
+  auto width = bitfield->getBitWidthValue(ctx);
+
+  // We can ignore zero-width bit-fields.
+  if (width == 0) return;
+
+  // toCharUnitsFromBits rounds down.
+  CharUnits bitfieldByteBegin = ctx.toCharUnitsFromBits(bitfieldBitBegin);
+
+  // Find the offset of the last byte that is partially occupied by the
+  // bit-field; since we otherwise expect exclusive ends, the end is the
+  // next byte.
+  uint64_t bitfieldBitLast = bitfieldBitBegin + width - 1;
+  CharUnits bitfieldByteEnd =
+    ctx.toCharUnitsFromBits(bitfieldBitLast) + CharUnits::One();
+  addOpaqueData(recordBegin + bitfieldByteBegin,
+                recordBegin + bitfieldByteEnd);
+}
+
+void SwiftAggLowering::addTypedData(llvm::Type *type, CharUnits begin) {
+  assert(type && "didn't provide type for typed data");
+  addTypedData(type, begin, begin + getTypeStoreSize(CGM, type));
+}
+
+void SwiftAggLowering::addTypedData(llvm::Type *type,
+                                    CharUnits begin, CharUnits end) {
+  assert(type && "didn't provide type for typed data");
+  assert(getTypeStoreSize(CGM, type) == end - begin);
+
+  // Legalize vector types.
+  if (auto vecTy = dyn_cast<llvm::VectorType>(type)) {
+    SmallVector<llvm::Type*, 4> componentTys;
+    legalizeVectorType(CGM, end - begin, vecTy, componentTys);
+    assert(componentTys.size() >= 1);
+
+    // Walk the initial components.
+    for (size_t i = 0, e = componentTys.size(); i != e - 1; ++i) {
+      llvm::Type *componentTy = componentTys[i];
+      auto componentSize = getTypeStoreSize(CGM, componentTy);
+      assert(componentSize < end - begin);
+      addLegalTypedData(componentTy, begin, begin + componentSize);
+      begin += componentSize;
+    }
+
+    return addLegalTypedData(componentTys.back(), begin, end);
+  }
+
+  // Legalize integer types.
+  if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
+    if (!isLegalIntegerType(CGM, intTy))
+      return addOpaqueData(begin, end);
+  }
+
+  // All other types should be legal.
+  return addLegalTypedData(type, begin, end);
+}
+
+void SwiftAggLowering::addLegalTypedData(llvm::Type *type,
+                                         CharUnits begin, CharUnits end) {
+  // Require the type to be naturally aligned.
+  if (!begin.isZero() && !begin.isMultipleOf(getNaturalAlignment(CGM, type))) {
+
+    // Try splitting vector types.
+    if (auto vecTy = dyn_cast<llvm::VectorType>(type)) {
+      auto split = splitLegalVectorType(CGM, end - begin, vecTy);
+      auto eltTy = split.first;
+      auto numElts = split.second;
+
+      auto eltSize = (end - begin) / numElts;
+      assert(eltSize == getTypeStoreSize(CGM, eltTy));
+      for (size_t i = 0, e = numElts; i != e; ++i) {
+        addLegalTypedData(type, begin, begin + eltSize);
+        begin += eltSize;
+      }
+      assert(begin == end);
+      return;
+    }
+
+    return addOpaqueData(begin, end);
+  }
+
+  addEntry(type, begin, end);
+}
+
+void SwiftAggLowering::addEntry(llvm::Type *type,
+                                CharUnits begin, CharUnits end) {
+  assert(!type ||
+         (!isa<llvm::StructType>(type) && !isa<llvm::ArrayType>(type)) &&
+         "cannot add aggregate-typed data");
+  assert(!type || begin.isMultipleOf(getNaturalAlignment(CGM, type)));
+
+  // Fast path: we can just add entries to the end.
+  if (Entries.empty() || Entries.back().End <= begin) {
+    Entries.push_back({begin, end, type});
+    return;
+  }
+
+  // Find the first existing entry that ends after the start of the new data.
+  // TODO: do a binary search if Entries is big enough for it to matter.
+  size_t index = Entries.size() - 1;
+  while (index != 0) {
+    if (Entries[index - 1].End <= begin) break;
+    --index;
+  }
+
+  // The entry ends after the start of the new data.
+  // If the entry starts after the end of the new data, there's no conflict.
+  if (Entries[index].Begin >= end) {
+    // This insertion is potentially O(n), but the way we generally build
+    // these layouts makes that unlikely to matter: we'd need a union of
+    // several very large types.
+    Entries.insert(Entries.begin() + index, {begin, end, type});
+    return;
+  }
+
+  // Otherwise, the ranges overlap.  The new range might also overlap
+  // with later ranges.
+restartAfterSplit:
+
+  // Simplest case: an exact overlap.
+  if (Entries[index].Begin == begin && Entries[index].End == end) {
+    // If the types match exactly, great.
+    if (Entries[index].Type == type) return;
+
+    // If either type is opaque, make the entry opaque and return.
+    if (Entries[index].Type == nullptr) {
+      return;
+    } else if (type == nullptr) {
+      Entries[index].Type = nullptr;
+      return;
+    }
+
+    // If they disagree in an ABI-agnostic way, just resolve the conflict
+    // arbitrarily.
+    if (auto entryType = getCommonType(Entries[index].Type, type)) {
+      Entries[index].Type = entryType;
+      return;
+    }
+
+    // Otherwise, make the entry opaque.
+    Entries[index].Type = nullptr;
+    return;
+  }
+
+  // Okay, we have an overlapping conflict of some sort.
+
+  // If we have a vector type, split it.
+  if (auto vecTy = dyn_cast_or_null<llvm::VectorType>(type)) {
+    auto eltTy = vecTy->getElementType();
+    CharUnits eltSize = (end - begin) / vecTy->getNumElements();
+    assert(eltSize == getTypeStoreSize(CGM, eltTy));
+    for (unsigned i = 0, e = vecTy->getNumElements(); i != e; ++i) {
+      addEntry(eltTy, begin, begin + eltSize);
+      begin += eltSize;
+    }
+    assert(begin == end);
+    return;
+  }
+
+  // If the entry is a vector type, split it and try again.
+  if (Entries[index].Type && Entries[index].Type->isVectorTy()) {
+    splitVectorEntry(index);
+    goto restartAfterSplit;
+  }
+
+  // Okay, we have no choice but to make the existing entry opaque.
+
+  Entries[index].Type = nullptr;
+
+  // Stretch the start of the entry to the beginning of the range.
+  if (begin < Entries[index].Begin) {
+    Entries[index].Begin = begin;
+    assert(index == 0 || begin >= Entries[index - 1].End);
+  }
+
+  // Stretch the end of the entry to the end of the range; but if we run
+  // into the start of the next entry, just leave the range there and repeat.
+  while (end > Entries[index].End) {
+    assert(Entries[index].Type == nullptr);
+
+    // If the range doesn't overlap the next entry, we're done.
+    if (index == Entries.size() - 1 || end <= Entries[index + 1].Begin) {
+      Entries[index].End = end;
+      break;
+    }
+
+    // Otherwise, stretch to the start of the next entry.
+    Entries[index].End = Entries[index + 1].Begin;
+
+    // Continue with the next entry.
+    index++;
+
+    // This entry needs to be made opaque if it is not already.
+    if (Entries[index].Type == nullptr)
+      continue;
+
+    // Split vector entries unless we completely subsume them.
+    if (Entries[index].Type->isVectorTy() &&
+        end < Entries[index].End) {
+      splitVectorEntry(index);
+    }
+
+    // Make the entry opaque.
+    Entries[index].Type = nullptr;
+  }
+}
+
+/// Replace the entry of vector type at offset 'index' with a sequence
+/// of its component vectors.
+void SwiftAggLowering::splitVectorEntry(unsigned index) {
+  auto vecTy = cast<llvm::VectorType>(Entries[index].Type);
+  auto split = splitLegalVectorType(CGM, Entries[index].getWidth(), vecTy);
+
+  auto eltTy = split.first;
+  CharUnits eltSize = getTypeStoreSize(CGM, eltTy);
+  auto numElts = split.second;
+  Entries.insert(&Entries[index + 1], numElts - 1, StorageEntry());
+
+  CharUnits begin = Entries[index].Begin;
+  for (unsigned i = 0; i != numElts; ++i) {
+    Entries[index].Type = eltTy;
+    Entries[index].Begin = begin;
+    Entries[index].End = begin + eltSize;
+    begin += eltSize;
+  }
+}
+
+/// Given a power-of-two unit size, return the offset of the aligned unit
+/// of that size which contains the given offset.
+///
+/// In other words, round down to the nearest multiple of the unit size.
+static CharUnits getOffsetAtStartOfUnit(CharUnits offset, CharUnits unitSize) {
+  assert(isPowerOf2(unitSize.getQuantity()));
+  auto unitMask = ~(unitSize.getQuantity() - 1);
+  return CharUnits::fromQuantity(offset.getQuantity() & unitMask);
+}
+
+static bool areBytesInSameUnit(CharUnits first, CharUnits second,
+                               CharUnits chunkSize) {
+  return getOffsetAtStartOfUnit(first, chunkSize)
+      == getOffsetAtStartOfUnit(second, chunkSize);
+}
+
+void SwiftAggLowering::finish() {
+  if (Entries.empty()) {
+    Finished = true;
+    return;
+  }
+
+  // We logically split the layout down into a series of chunks of this size,
+  // which is generally the size of a pointer.
+  const CharUnits chunkSize = getMaximumVoluntaryIntegerSize(CGM);
+
+  // First pass: if two entries share a chunk, make them both opaque
+  // and stretch one to meet the next.
+  bool hasOpaqueEntries = (Entries[0].Type == nullptr);
+  for (size_t i = 1, e = Entries.size(); i != e; ++i) {
+    if (areBytesInSameUnit(Entries[i - 1].End - CharUnits::One(),
+                           Entries[i].Begin, chunkSize)) {
+      Entries[i - 1].Type = nullptr;
+      Entries[i].Type = nullptr;
+      Entries[i - 1].End = Entries[i].Begin;
+      hasOpaqueEntries = true;
+
+    } else if (Entries[i].Type == nullptr) {
+      hasOpaqueEntries = true;
+    }
+  }
+
+  // The rest of the algorithm leaves non-opaque entries alone, so if we
+  // have no opaque entries, we're done.
+  if (!hasOpaqueEntries) {
+    Finished = true;
+    return;
+  }
+
+  // Okay, move the entries to a temporary and rebuild Entries.
+  auto orig = std::move(Entries);
+  assert(Entries.empty());
+
+  for (size_t i = 0, e = orig.size(); i != e; ++i) {
+    // Just copy over non-opaque entries.
+    if (orig[i].Type != nullptr) {
+      Entries.push_back(orig[i]);
+      continue;
+    }
+
+    // Scan forward to determine the full extent of the next opaque range.
+    // We know from the first pass that only contiguous ranges will overlap
+    // the same aligned chunk.
+    auto begin = orig[i].Begin;
+    auto end = orig[i].End;
+    while (i + 1 != e &&
+           orig[i + 1].Type == nullptr &&
+           end == orig[i + 1].Begin) {
+      end = orig[i + 1].End;
+      i++;
+    }
+
+    // Add an entry per intersected chunk.
+    do {
+      // Find the smallest aligned storage unit in the maximal aligned
+      // storage unit containing 'begin' that contains all the bytes in
+      // the intersection between the range and this chunk.
+      CharUnits localBegin = begin;
+      CharUnits chunkBegin = getOffsetAtStartOfUnit(localBegin, chunkSize);
+      CharUnits chunkEnd = chunkBegin + chunkSize;
+      CharUnits localEnd = std::min(end, chunkEnd);
+
+      // Just do a simple loop over ever-increasing unit sizes.
+      CharUnits unitSize = CharUnits::One();
+      CharUnits unitBegin, unitEnd;
+      for (; ; unitSize *= 2) {
+        assert(unitSize <= chunkSize);
+        unitBegin = getOffsetAtStartOfUnit(localBegin, unitSize);
+        unitEnd = unitBegin + unitSize;
+        if (unitEnd >= localEnd) break;
+      }
+
+      // Add an entry for this unit.
+      auto entryTy =
+        llvm::IntegerType::get(CGM.getLLVMContext(),
+                               CGM.getContext().toBits(unitSize));
+      Entries.push_back({unitBegin, unitEnd, entryTy});
+
+      // The next chunk starts where this chunk left off.
+      begin = localEnd;
+    } while (begin != end);
+  }
+
+  // Okay, finally finished.
+  Finished = true;
+}
+
+void SwiftAggLowering::enumerateComponents(EnumerationCallback callback) const {
+  assert(Finished && "haven't yet finished lowering");
+
+  for (auto &entry : Entries) {
+    callback(entry.Begin, entry.Type);
+  }
+}
+
+std::pair<llvm::StructType*, llvm::Type*>
+SwiftAggLowering::getCoerceAndExpandTypes() const {
+  assert(Finished && "haven't yet finished lowering");
+
+  auto &ctx = CGM.getLLVMContext();
+
+  if (Entries.empty()) {
+    auto type = llvm::StructType::get(ctx);
+    return { type, type };
+  }
+
+  SmallVector<llvm::Type*, 8> elts;
+  CharUnits lastEnd = CharUnits::Zero();
+  bool hasPadding = false;
+  bool packed = false;
+  for (auto &entry : Entries) {
+    if (entry.Begin != lastEnd) {
+      auto paddingSize = entry.Begin - lastEnd;
+      assert(!paddingSize.isNegative());
+
+      auto padding = llvm::ArrayType::get(llvm::Type::getInt8Ty(ctx),
+                                          paddingSize.getQuantity());
+      elts.push_back(padding);
+      hasPadding = true;
+    }
+
+    if (!packed && !entry.Begin.isMultipleOf(
+          CharUnits::fromQuantity(
+            CGM.getDataLayout().getABITypeAlignment(entry.Type))))
+      packed = true;
+
+    elts.push_back(entry.Type);
+    lastEnd = entry.End;
+  }
+
+  // We don't need to adjust 'packed' to deal with possible tail padding
+  // because we never do that kind of access through the coercion type.
+  auto coercionType = llvm::StructType::get(ctx, elts, packed);
+
+  llvm::Type *unpaddedType = coercionType;
+  if (hasPadding) {
+    elts.clear();
+    for (auto &entry : Entries) {
+      elts.push_back(entry.Type);
+    }
+    if (elts.size() == 1) {
+      unpaddedType = elts[0];
+    } else {
+      unpaddedType = llvm::StructType::get(ctx, elts, /*packed*/ false);
+    }
+  } else if (Entries.size() == 1) {
+    unpaddedType = Entries[0].Type;
+  }
+
+  return { coercionType, unpaddedType };
+}
+
+bool SwiftAggLowering::shouldPassIndirectly(bool asReturnValue) const {
+  assert(Finished && "haven't yet finished lowering");
+
+  // Empty types don't need to be passed indirectly.
+  if (Entries.empty()) return false;
+
+  CharUnits totalSize = Entries.back().End;
+
+  // Avoid copying the array of types when there's just a single element.
+  if (Entries.size() == 1) {
+    return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+                                                           Entries.back().Type,
+                                                             asReturnValue);    
+  }
+
+  SmallVector<llvm::Type*, 8> componentTys;
+  componentTys.reserve(Entries.size());
+  for (auto &entry : Entries) {
+    componentTys.push_back(entry.Type);
+  }
+  return getSwiftABIInfo(CGM).shouldPassIndirectlyForSwift(totalSize,
+                                                           componentTys,
+                                                           asReturnValue);
+}
+
+CharUnits swiftcall::getMaximumVoluntaryIntegerSize(CodeGenModule &CGM) {
+  // Currently always the size of an ordinary pointer.
+  return CGM.getContext().toCharUnitsFromBits(
+           CGM.getContext().getTargetInfo().getPointerWidth(0));
+}
+
+CharUnits swiftcall::getNaturalAlignment(CodeGenModule &CGM, llvm::Type *type) {
+  // For Swift's purposes, this is always just the store size of the type
+  // rounded up to a power of 2.
+  auto size = (unsigned long long) getTypeStoreSize(CGM, type).getQuantity();
+  if (!isPowerOf2(size)) {
+    size = 1U << (llvm::findLastSet(size, llvm::ZB_Undefined) + 1);
+  }
+  assert(size >= CGM.getDataLayout().getABITypeAlignment(type));
+  return CharUnits::fromQuantity(size);
+}
+
+bool swiftcall::isLegalIntegerType(CodeGenModule &CGM,
+                                   llvm::IntegerType *intTy) {
+  auto size = intTy->getBitWidth();
+  switch (size) {
+  case 1:
+  case 8:
+  case 16:
+  case 32:
+  case 64:
+    // Just assume that the above are always legal.
+    return true;
+
+  case 128:
+    return CGM.getContext().getTargetInfo().hasInt128Type();
+
+  default:
+    return false;
+  }
+}
+
+bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                  llvm::VectorType *vectorTy) {
+  return isLegalVectorType(CGM, vectorSize, vectorTy->getElementType(),
+                           vectorTy->getNumElements());
+}
+
+bool swiftcall::isLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                  llvm::Type *eltTy, unsigned numElts) {
+  assert(numElts > 1 && "illegal vector length");
+  return getSwiftABIInfo(CGM)
+           .isLegalVectorTypeForSwift(vectorSize, eltTy, numElts);
+}
+
+std::pair<llvm::Type*, unsigned>
+swiftcall::splitLegalVectorType(CodeGenModule &CGM, CharUnits vectorSize,
+                                llvm::VectorType *vectorTy) {
+  auto numElts = vectorTy->getNumElements();
+  auto eltTy = vectorTy->getElementType();
+
+  // Try to split the vector type in half.
+  if (numElts >= 4 && isPowerOf2(numElts)) {
+    if (isLegalVectorType(CGM, vectorSize / 2, eltTy, numElts / 2))
+      return {llvm::VectorType::get(eltTy, numElts / 2), 2};
+  }
+
+  return {eltTy, numElts};
+}
+
+void swiftcall::legalizeVectorType(CodeGenModule &CGM, CharUnits origVectorSize,
+                                   llvm::VectorType *origVectorTy,
+                             llvm::SmallVectorImpl<llvm::Type*> &components) {
+  // If it's already a legal vector type, use it.
+  if (isLegalVectorType(CGM, origVectorSize, origVectorTy)) {
+    components.push_back(origVectorTy);
+    return;
+  }
+
+  // Try to split the vector into legal subvectors.
+  auto numElts = origVectorTy->getNumElements();
+  auto eltTy = origVectorTy->getElementType();
+  assert(numElts != 1);
+
+  // The largest size that we're still considering making subvectors of.
+  // Always a power of 2.
+  unsigned logCandidateNumElts = llvm::findLastSet(numElts, llvm::ZB_Undefined);
+  unsigned candidateNumElts = 1U << logCandidateNumElts;
+  assert(candidateNumElts <= numElts && candidateNumElts * 2 > numElts);
+
+  // Minor optimization: don't check the legality of this exact size twice.
+  if (candidateNumElts == numElts) {
+    logCandidateNumElts--;
+    candidateNumElts >>= 1;
+  }
+
+  CharUnits eltSize = (origVectorSize / numElts);
+  CharUnits candidateSize = eltSize * candidateNumElts;
+
+  // The sensibility of this algorithm relies on the fact that we never
+  // have a legal non-power-of-2 vector size without having the power of 2
+  // also be legal.
+  while (logCandidateNumElts > 0) {
+    assert(candidateNumElts == 1U << logCandidateNumElts);
+    assert(candidateNumElts <= numElts);
+    assert(candidateSize == eltSize * candidateNumElts);
+
+    // Skip illegal vector sizes.
+    if (!isLegalVectorType(CGM, candidateSize, eltTy, candidateNumElts)) {
+      logCandidateNumElts--;
+      candidateNumElts /= 2;
+      candidateSize /= 2;
+      continue;
+    }
+
+    // Add the right number of vectors of this size.
+    auto numVecs = numElts >> logCandidateNumElts;
+    components.append(numVecs, llvm::VectorType::get(eltTy, candidateNumElts));
+    numElts -= (numVecs << logCandidateNumElts);
+
+    if (numElts == 0) return;
+
+    // It's possible that the number of elements remaining will be legal.
+    // This can happen with e.g. <7 x float> when <3 x float> is legal.
+    // This only needs to be separately checked if it's not a power of 2.
+    if (numElts > 2 && !isPowerOf2(numElts) &&
+        isLegalVectorType(CGM, eltSize * numElts, eltTy, numElts)) {
+      components.push_back(llvm::VectorType::get(eltTy, numElts));
+      return;
+    }
+
+    // Bring vecSize down to something no larger than numElts.
+    do {
+      logCandidateNumElts--;
+      candidateNumElts /= 2;
+      candidateSize /= 2;
+    } while (candidateNumElts > numElts);
+  }
+
+  // Otherwise, just append a bunch of individual elements.
+  components.append(numElts, eltTy);
+}
+
+bool swiftcall::shouldPassCXXRecordIndirectly(CodeGenModule &CGM,
+                                              const CXXRecordDecl *record) {
+  // Following a recommendation from Richard Smith, pass a C++ type
+  // indirectly only if the destructor is non-trivial or *all* of the
+  // copy/move constructors are deleted or non-trivial.
+
+  if (record->hasNonTrivialDestructor())
+    return true;
+
+  // It would be nice if this were summarized on the CXXRecordDecl.
+  for (auto ctor : record->ctors()) {
+    if (ctor->isCopyOrMoveConstructor() && !ctor->isDeleted() &&
+        ctor->isTrivial()) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static ABIArgInfo classifyExpandedType(SwiftAggLowering &lowering,
+                                       bool forReturn,
+                                       CharUnits alignmentForIndirect) {
+  if (lowering.empty()) {
+    return ABIArgInfo::getIgnore();
+  } else if (lowering.shouldPassIndirectly(forReturn)) {
+    return ABIArgInfo::getIndirect(alignmentForIndirect, /*byval*/ false);
+  } else {
+    auto types = lowering.getCoerceAndExpandTypes();
+    return ABIArgInfo::getCoerceAndExpand(types.first, types.second);
+  }
+}
+
+static ABIArgInfo classifyType(CodeGenModule &CGM, CanQualType type,
+                               bool forReturn) {
+  if (auto recordType = dyn_cast<RecordType>(type)) {
+    auto record = recordType->getDecl();
+    auto &layout = CGM.getContext().getASTRecordLayout(record);
+
+    if (auto cxxRecord = dyn_cast<CXXRecordDecl>(record)) {
+      if (shouldPassCXXRecordIndirectly(CGM, cxxRecord))
+        return ABIArgInfo::getIndirect(layout.getAlignment(), /*byval*/ false);
+    }
+
+    SwiftAggLowering lowering(CGM);
+    lowering.addTypedData(recordType->getDecl(), CharUnits::Zero(), layout);
+    lowering.finish();
+
+    return classifyExpandedType(lowering, forReturn, layout.getAlignment());
+  }
+
+  // Just assume that all of our target ABIs can support returning at least
+  // two integer or floating-point values.
+  if (isa<ComplexType>(type)) {
+    return (forReturn ? ABIArgInfo::getDirect() : ABIArgInfo::getExpand());
+  }
+
+  // Vector types may need to be legalized.
+  if (isa<VectorType>(type)) {
+    SwiftAggLowering lowering(CGM);
+    lowering.addTypedData(type, CharUnits::Zero());
+    lowering.finish();
+
+    CharUnits alignment = CGM.getContext().getTypeAlignInChars(type);
+    return classifyExpandedType(lowering, forReturn, alignment);
+  }
+
+  // Member pointer types need to be expanded, but it's a simple form of
+  // expansion that 'Direct' can handle.  Note that CanBeFlattened should be
+  // true for this to work.
+
+  // 'void' needs to be ignored.
+  if (type->isVoidType()) {
+    return ABIArgInfo::getIgnore();
+  }
+
+  // Everything else can be passed directly.
+  return ABIArgInfo::getDirect();
+}
+
+ABIArgInfo swiftcall::classifyReturnType(CodeGenModule &CGM, CanQualType type) {
+  return classifyType(CGM, type, /*forReturn*/ true);
+}
+
+ABIArgInfo swiftcall::classifyArgumentType(CodeGenModule &CGM,
+                                           CanQualType type) {
+  return classifyType(CGM, type, /*forReturn*/ false);
+}
+
+void swiftcall::computeABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+  auto &retInfo = FI.getReturnInfo();
+  retInfo = classifyReturnType(CGM, FI.getReturnType());
+
+  for (unsigned i = 0, e = FI.arg_size(); i != e; ++i) {
+    auto &argInfo = FI.arg_begin()[i];
+    argInfo.info = classifyArgumentType(CGM, argInfo.type);
+  }
+}
\ No newline at end of file
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 8c457fcd0972c..b9a7d315891ab 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -19,6 +19,7 @@
 #include "CodeGenFunction.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
+#include "clang/CodeGen/SwiftCallingConv.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
@@ -68,6 +69,46 @@ Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
 
 ABIInfo::~ABIInfo() {}
 
+/// Does the given lowering require more than the given number of
+/// registers when expanded?
+///
+/// This is intended to be the basis of a reasonable basic implementation
+/// of should{Pass,Return}IndirectlyForSwift.
+///
+/// For most targets, a limit of four total registers is reasonable; this
+/// limits the amount of code required in order to move around the value
+/// in case it wasn't produced immediately prior to the call by the caller
+/// (or wasn't produced in exactly the right registers) or isn't used
+/// immediately within the callee.  But some targets may need to further
+/// limit the register count due to an inability to support that many
+/// return registers.
+static bool occupiesMoreThan(CodeGenTypes &cgt,
+                             ArrayRef<llvm::Type*> scalarTypes,
+                             unsigned maxAllRegisters) {
+  unsigned intCount = 0, fpCount = 0;
+  for (llvm::Type *type : scalarTypes) {
+    if (type->isPointerTy()) {
+      intCount++;
+    } else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
+      auto ptrWidth = cgt.getTarget().getPointerWidth(0);
+      intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
+    } else {
+      assert(type->isVectorTy() || type->isFloatingPointTy());
+      fpCount++;
+    }
+  }
+
+  return (intCount + fpCount > maxAllRegisters);
+}
+
+bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
+                                             llvm::Type *eltTy,
+                                             unsigned numElts) const {
+  // The default implementation of this assumes that the target guarantees
+  // 128-bit SIMD support but nothing more.
+  return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
+}
+
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
                                               CGCXXABI &CXXABI) {
   const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
@@ -866,7 +907,7 @@ struct CCState {
 };
 
 /// X86_32ABIInfo - The X86-32 ABI information.
-class X86_32ABIInfo : public ABIInfo {
+class X86_32ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer,
     Float
@@ -935,12 +976,22 @@ class X86_32ABIInfo : public ABIInfo {
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                 bool RetSmallStructInRegABI, bool Win32StructABI,
                 unsigned NumRegisterParameters, bool SoftFloatABI)
-    : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+    : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
       IsRetSmallStructInRegABI(RetSmallStructInRegABI), 
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
       DefaultNumRegisterParameters(NumRegisterParameters) {}
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    // LLVM's x86-32 lowering currently only assigns up to three
+    // integer registers and three fp registers.  Oddly, it'll use up to
+    // four vector registers for vectors, but those can overlap with the
+    // scalar registers.
+    return occupiesMoreThan(CGT, scalars, /*total*/ 3);
+  }  
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -1758,7 +1809,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
 }
 
 /// X86_64ABIInfo - The X86_64 ABI information.
-class X86_64ABIInfo : public ABIInfo {
+class X86_64ABIInfo : public SwiftABIInfo {
   enum Class {
     Integer = 0,
     SSE,
@@ -1880,7 +1931,7 @@ class X86_64ABIInfo : public ABIInfo {
 
 public:
   X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
-      ABIInfo(CGT), AVXLevel(AVXLevel),
+      SwiftABIInfo(CGT), AVXLevel(AVXLevel),
       Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
   }
 
@@ -1907,6 +1958,12 @@ class X86_64ABIInfo : public ABIInfo {
   bool has64BitPointers() const {
     return Has64BitPointers;
   }
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }  
 };
 
 /// WinX86_64ABIInfo - The Windows X86_64 ABI information.
@@ -4338,7 +4395,7 @@ PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 
 namespace {
 
-class AArch64ABIInfo : public ABIInfo {
+class AArch64ABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     AAPCS = 0,
@@ -4349,7 +4406,8 @@ class AArch64ABIInfo : public ABIInfo {
   ABIKind Kind;
 
 public:
-  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind) : ABIInfo(CGT), Kind(Kind) {}
+  AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
+    : SwiftABIInfo(CGT), Kind(Kind) {}
 
 private:
   ABIKind getABIKind() const { return Kind; }
@@ -4382,6 +4440,12 @@ class AArch64ABIInfo : public ABIInfo {
     return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -4856,7 +4920,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
 
 namespace {
 
-class ARMABIInfo : public ABIInfo {
+class ARMABIInfo : public SwiftABIInfo {
 public:
   enum ABIKind {
     APCS = 0,
@@ -4869,7 +4933,8 @@ class ARMABIInfo : public ABIInfo {
   ABIKind Kind;
 
 public:
-  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
+  ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
+      : SwiftABIInfo(CGT), Kind(_Kind) {
     setCCs();
   }
 
@@ -4915,6 +4980,12 @@ class ARMABIInfo : public ABIInfo {
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
   void setCCs();
+
+  bool shouldPassIndirectlyForSwift(CharUnits totalSize,
+                                    ArrayRef<llvm::Type*> scalars,
+                                    bool asReturnValue) const override {
+    return occupiesMoreThan(CGT, scalars, /*total*/ 4);
+  }
 };
 
 class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 87b4704986238..71f6b0a4c5c5d 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -29,15 +29,14 @@ class Value;
 }
 
 namespace clang {
-class ABIInfo;
 class Decl;
 
 namespace CodeGen {
+class ABIInfo;
 class CallArgList;
 class CodeGenModule;
 class CodeGenFunction;
 class CGFunctionInfo;
-}
 
 /// TargetCodeGenInfo - This class organizes various target-specific
 /// codegeneration issues, like target-specific attributes, builtins and so
@@ -219,6 +218,8 @@ class TargetCodeGenInfo {
                                        llvm::StringRef Value,
                                        llvm::SmallString<32> &Opt) const {}
 };
+
+} // namespace CodeGen
 } // namespace clang
 
 #endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H
diff --git a/clang/test/CodeGen/arm-swiftcall.c b/clang/test/CodeGen/arm-swiftcall.c
new file mode 100644
index 0000000000000..d246559413556
--- /dev/null
+++ b/clang/test/CodeGen/arm-swiftcall.c
@@ -0,0 +1,496 @@
+// RUN: %clang_cc1 -triple armv7-apple-darwin9 -emit-llvm -o - %s | FileCheck %s
+
+// This isn't really testing anything ARM-specific; it's just a convenient
+// 32-bit platform.
+
+#define SWIFTCALL __attribute__((swiftcall))
+#define OUT __attribute__((swift_indirect_result))
+#define ERROR __attribute__((swift_error_result))
+#define CONTEXT __attribute__((swift_context))
+
+/*****************************************************************************/
+/****************************** PARAMETER ABIS *******************************/
+/*****************************************************************************/
+
+SWIFTCALL void indirect_result_1(OUT int *arg0, OUT float *arg1) {}
+// CHECK-LABEL: define {{.*}} void @indirect_result_1(i32* noalias sret align 4 dereferenceable(4){{.*}}, float* noalias align 4 dereferenceable(4){{.*}})
+
+// TODO: maybe this shouldn't suppress sret.
+SWIFTCALL int indirect_result_2(OUT int *arg0, OUT float *arg1) {  __builtin_unreachable(); }
+// CHECK-LABEL: define {{.*}} i32 @indirect_result_2(i32* noalias align 4 dereferenceable(4){{.*}}, float* noalias align 4 dereferenceable(4){{.*}})
+
+typedef struct { char array[1024]; } struct_reallybig;
+SWIFTCALL struct_reallybig indirect_result_3(OUT int *arg0, OUT float *arg1) { __builtin_unreachable(); }
+// CHECK-LABEL: define {{.*}} void @indirect_result_3({{.*}}* noalias sret {{.*}}, i32* noalias align 4 dereferenceable(4){{.*}}, float* noalias align 4 dereferenceable(4){{.*}})
+
+SWIFTCALL void context_1(CONTEXT void *self) {}
+// CHECK-LABEL: define {{.*}} void @context_1(i8* swiftself
+
+SWIFTCALL void context_2(void *arg0, CONTEXT void *self) {}
+// CHECK-LABEL: define {{.*}} void @context_2(i8*{{.*}}, i8* swiftself
+
+SWIFTCALL void context_error_1(CONTEXT int *self, ERROR float **error) {}
+// CHECK-LABEL: define {{.*}} void @context_error_1(i32* swiftself{{.*}}, float** swifterror)
+// CHECK:       [[TEMP:%.*]] = alloca float*, align 4
+// CHECK:       [[T0:%.*]] = load float*, float** [[ERRORARG:%.*]], align 4
+// CHECK:       store float* [[T0]], float** [[TEMP]], align 4
+// CHECK:       [[T0:%.*]] = load float*, float** [[TEMP]], align 4
+// CHECK:       store float* [[T0]], float** [[ERRORARG]], align 4
+void test_context_error_1() {
+  int x;
+  float *error;
+  context_error_1(&x, &error);
+}
+// CHECK-LABEL: define void @test_context_error_1()
+// CHECK:       [[X:%.*]] = alloca i32, align 4
+// CHECK:       [[ERROR:%.*]] = alloca float*, align 4
+// CHECK:       [[TEMP:%.*]] = alloca swifterror float*, align 4
+// CHECK:       [[T0:%.*]] = load float*, float** [[ERROR]], align 4
+// CHECK:       store float* [[T0]], float** [[TEMP]], align 4
+// CHECK:       call [[SWIFTCC:cc16]] void @context_error_1(i32* swiftself [[X]], float** swifterror [[TEMP]])
+// CHECK:       [[T0:%.*]] = load float*, float** [[TEMP]], align 4
+// CHECK:       store float* [[T0]], float** [[ERROR]], align 4
+
+SWIFTCALL void context_error_2(short s, CONTEXT int *self, ERROR float **error) {}
+// CHECK-LABEL: define {{.*}} void @context_error_2(i16{{.*}}, i32* swiftself{{.*}}, float** swifterror)
+
+/*****************************************************************************/
+/********************************** LOWERING *********************************/
+/*****************************************************************************/
+
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef float float8 __attribute__((ext_vector_type(8)));
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef double double4 __attribute__((ext_vector_type(4)));
+typedef int int4 __attribute__((ext_vector_type(4)));
+typedef int int5 __attribute__((ext_vector_type(5)));
+typedef int int8 __attribute__((ext_vector_type(8)));
+
+#define TEST(TYPE)                       \
+  SWIFTCALL TYPE return_##TYPE(void) {   \
+    TYPE result = {};                    \
+    return result;                       \
+  }                                      \
+  SWIFTCALL void take_##TYPE(TYPE v) {   \
+  }                                      \
+  void test_##TYPE() {                   \
+    take_##TYPE(return_##TYPE());        \
+  }
+
+/*****************************************************************************/
+/*********************************** STRUCTS *********************************/
+/*****************************************************************************/
+
+typedef struct {
+} struct_empty;
+TEST(struct_empty);
+// CHECK-LABEL: define {{.*}} @return_struct_empty()
+// CHECK:   ret void
+// CHECK-LABEL: define {{.*}} @take_struct_empty()
+// CHECK:   ret void
+
+typedef struct {
+  int x;
+  char c0;
+  char c1;
+  float f0;
+  float f1;
+} struct_1;
+TEST(struct_1);
+// CHECK-LABEL: define {{.*}} @return_struct_1()
+// CHECK:   [[RET:%.*]] = alloca [[REC:%.*]], align 4
+// CHECK:   [[VAR:%.*]] = alloca [[REC]], align 4
+// CHECK:   @llvm.memset
+// CHECK:   @llvm.memcpy
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i16, \[2 x i8\], float, float }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i16, i16* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ i32, i16, float, float }]] undef, i32 [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], i16 [[SECOND]], 1
+// CHECK:   [[T2:%.*]] = insertvalue [[UAGG]] [[T1]], float [[THIRD]], 2
+// CHECK:   [[T3:%.*]] = insertvalue [[UAGG]] [[T2]], float [[FOURTH]], 3
+// CHECK:   ret [[UAGG]] [[T3]]
+// CHECK-LABEL: define {{.*}} @take_struct_1(i32, i16, float, float)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store i32 %0, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   store i16 %1, i16* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   store float %2, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   store float %3, float* [[T0]], align 4
+// CHECK:   ret void
+// CHECK-LABEL: define void @test_struct_1()
+// CHECK:   [[TMP:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_struct_1()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store i32 [[T1]], i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store i16 [[T1]], i16* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 2
+// CHECK:   store float [[T1]], float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 3
+// CHECK:   store float [[T1]], float* [[T0]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i16, i16* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   call [[SWIFTCC]] void @take_struct_1(i32 [[FIRST]], i16 [[SECOND]], float [[THIRD]], float [[FOURTH]])
+// CHECK:   ret void
+
+typedef struct {
+  int x;
+  char c0;
+  __attribute__((aligned(2))) char c1;
+  float f0;
+  float f1;
+} struct_2;
+TEST(struct_2);
+// CHECK-LABEL: define {{.*}} @return_struct_2()
+// CHECK:   [[RET:%.*]] = alloca [[REC:%.*]], align 4
+// CHECK:   [[VAR:%.*]] = alloca [[REC]], align 4
+// CHECK:   @llvm.memcpy
+// CHECK:   @llvm.memcpy
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i32, float, float }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ i32, i32, float, float }]] undef, i32 [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], i32 [[SECOND]], 1
+// CHECK:   [[T2:%.*]] = insertvalue [[UAGG]] [[T1]], float [[THIRD]], 2
+// CHECK:   [[T3:%.*]] = insertvalue [[UAGG]] [[T2]], float [[FOURTH]], 3
+// CHECK:   ret [[UAGG]] [[T3]]
+// CHECK-LABEL: define {{.*}} @take_struct_2(i32, i32, float, float)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store i32 %0, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   store i32 %1, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   store float %2, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   store float %3, float* [[T0]], align 4
+// CHECK:   ret void
+// CHECK-LABEL: define void @test_struct_2()
+// CHECK:   [[TMP:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_struct_2()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store i32 [[T1]], i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store i32 [[T1]], i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 2
+// CHECK:   store float [[T1]], float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 3
+// CHECK:   store float [[T1]], float* [[T0]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   call [[SWIFTCC]] void @take_struct_2(i32 [[FIRST]], i32 [[SECOND]], float [[THIRD]], float [[FOURTH]])
+// CHECK:   ret void
+
+// There's no way to put a field randomly in the middle of an otherwise
+// empty storage unit in C, so that case has to be tested in C++, which
+// can use empty structs to introduce arbitrary padding.  (In C, they end up
+// with size 0 and so don't affect layout.)
+
+// Misaligned data rule.
+typedef struct {
+  char c0;
+  __attribute__((packed)) float f;
+} struct_misaligned_1;
+TEST(struct_misaligned_1)
+// CHECK-LABEL: define {{.*}} @return_struct_misaligned_1()
+// CHECK:   [[RET:%.*]] = alloca [[REC:%.*]], align
+// CHECK:   [[VAR:%.*]] = alloca [[REC]], align
+// CHECK:   @llvm.memset
+// CHECK:   @llvm.memcpy
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i8 }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i8, i8* [[T0]], align
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ i32, i8 }]] undef, i32 [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], i8 [[SECOND]], 1
+// CHECK:   ret [[UAGG]] [[T1]]
+// CHECK-LABEL: define {{.*}} @take_struct_misaligned_1(i32, i8)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store i32 %0, i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   store i8 %1, i8* [[T0]], align
+// CHECK:   ret void
+
+// Too many scalars.
+typedef struct {
+  int x[5];
+} struct_big_1;
+TEST(struct_big_1)
+
+// CHECK-LABEL: define {{.*}} void @return_struct_big_1({{.*}} noalias sret
+
+// Should not be byval.
+// CHECK-LABEL: define {{.*}} void @take_struct_big_1({{.*}}*{{( %.*)?}})
+
+/*****************************************************************************/
+/********************************* TYPE MERGING ******************************/
+/*****************************************************************************/
+
+typedef union {
+  float f;
+  double d;
+} union_het_fp;
+TEST(union_het_fp)
+// CHECK-LABEL: define {{.*}} @return_union_het_fp()
+// CHECK:   [[RET:%.*]] = alloca [[REC:%.*]], align 4
+// CHECK:   [[VAR:%.*]] = alloca [[REC]], align 4
+// CHECK:   @llvm.memcpy
+// CHECK:   @llvm.memcpy
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, i32 }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ i32, i32 }]] undef, i32 [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], i32 [[SECOND]], 1
+// CHECK:   ret [[UAGG]] [[T1]]
+// CHECK-LABEL: define {{.*}} @take_union_het_fp(i32, i32)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store i32 %0, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   store i32 %1, i32* [[T0]], align 4
+// CHECK:   ret void
+// CHECK-LABEL: define void @test_union_het_fp()
+// CHECK:   [[TMP:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_union_het_fp()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store i32 [[T1]], i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store i32 [[T1]], i32* [[T0]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   call [[SWIFTCC]] void @take_union_het_fp(i32 [[FIRST]], i32 [[SECOND]])
+// CHECK:   ret void
+
+
+typedef union {
+  float f1;
+  float f2;
+} union_hom_fp;
+TEST(union_hom_fp)
+// CHECK-LABEL: define void @test_union_hom_fp()
+// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 4
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] float @return_union_hom_fp()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ float }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store float [[CALL]], float* [[T0]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   call [[SWIFTCC]] void @take_union_hom_fp(float [[FIRST]])
+// CHECK:   ret void
+
+typedef union {
+  float f1;
+  float4 fv2;
+} union_hom_fp_partial;
+TEST(union_hom_fp_partial)
+// CHECK-LABEL: define void @test_union_hom_fp_partial()
+// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 16
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ float, float, float, float }]] @return_union_hom_fp_partial()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ float, float, float, float }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store float [[T1]], float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store float [[T1]], float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 2
+// CHECK:   store float [[T1]], float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 3
+// CHECK:   store float [[T1]], float* [[T0]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align
+// CHECK:   call [[SWIFTCC]] void @take_union_hom_fp_partial(float [[FIRST]], float [[SECOND]], float [[THIRD]], float [[FOURTH]])
+// CHECK:   ret void
+
+typedef union {
+  struct { int x, y; } f1;
+  float4 fv2;
+} union_het_fpv_partial;
+TEST(union_het_fpv_partial)
+// CHECK-LABEL: define void @test_union_het_fpv_partial()
+// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 16
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ i32, i32, float, float }]] @return_union_het_fpv_partial()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ i32, i32, float, float }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store i32 [[T1]], i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store i32 [[T1]], i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 2
+// CHECK:   store float [[T1]], float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 3
+// CHECK:   store float [[T1]], float* [[T0]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 3
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align
+// CHECK:   call [[SWIFTCC]] void @take_union_het_fpv_partial(i32 [[FIRST]], i32 [[SECOND]], float [[THIRD]], float [[FOURTH]])
+// CHECK:   ret void
+
+/*****************************************************************************/
+/****************************** VECTOR LEGALIZATION **************************/
+/*****************************************************************************/
+
+TEST(int4)
+// CHECK-LABEL: define {{.*}} <4 x i32> @return_int4()
+// CHECK-LABEL: define {{.*}} @take_int4(<4 x i32>
+
+TEST(int8)
+// CHECK-LABEL: define {{.*}} @return_int8()
+// CHECK:   [[RET:%.*]] = alloca [[REC:<8 x i32>]], align 32
+// CHECK:   [[VAR:%.*]] = alloca [[REC]], align
+// CHECK:   store
+// CHECK:   load
+// CHECK:   store
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ <4 x i32>, <4 x i32> }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load <4 x i32>, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load <4 x i32>, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ <4 x i32>, <4 x i32> }]] undef, <4 x i32> [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], <4 x i32> [[SECOND]], 1
+// CHECK:   ret [[UAGG]] [[T1]]
+// CHECK-LABEL: define {{.*}} @take_int8(<4 x i32>, <4 x i32>)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store <4 x i32> %0, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   store <4 x i32> %1, <4 x i32>* [[T0]], align
+// CHECK:   ret void
+// CHECK-LABEL: define void @test_int8()
+// CHECK:   [[TMP1:%.*]] = alloca [[REC]], align
+// CHECK:   [[TMP2:%.*]] = alloca [[REC]], align
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_int8()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP1]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store <4 x i32> [[T1]], <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store <4 x i32> [[T1]], <4 x i32>* [[T0]], align
+// CHECK:   [[V:%.*]] = load [[REC]], [[REC]]* [[TMP1]], align
+// CHECK:   store [[REC]] [[V]], [[REC]]* [[TMP2]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP2]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load <4 x i32>, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load <4 x i32>, <4 x i32>* [[T0]], align
+// CHECK:   call [[SWIFTCC]] void @take_int8(<4 x i32> [[FIRST]], <4 x i32> [[SECOND]])
+// CHECK:   ret void
+
+TEST(int5)
+// CHECK-LABEL: define {{.*}} @return_int5()
+// CHECK:   [[RET:%.*]] = alloca [[REC:<5 x i32>]], align 32
+// CHECK:   [[VAR:%.*]] = alloca [[REC]], align
+// CHECK:   store
+// CHECK:   load
+// CHECK:   store
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ <4 x i32>, i32 }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load <4 x i32>, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ <4 x i32>, i32 }]] undef, <4 x i32> [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], i32 [[SECOND]], 1
+// CHECK:   ret [[UAGG]] [[T1]]
+// CHECK-LABEL: define {{.*}} @take_int5(<4 x i32>, i32)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store <4 x i32> %0, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   store i32 %1, i32* [[T0]], align
+// CHECK:   ret void
+// CHECK-LABEL: define void @test_int5()
+// CHECK:   [[TMP1:%.*]] = alloca [[REC]], align
+// CHECK:   [[TMP2:%.*]] = alloca [[REC]], align
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG]] @return_int5()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP1]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store <4 x i32> [[T1]], <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store i32 [[T1]], i32* [[T0]], align
+// CHECK:   [[V:%.*]] = load [[REC]], [[REC]]* [[TMP1]], align
+// CHECK:   store [[REC]] [[V]], [[REC]]* [[TMP2]], align
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP2]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load <4 x i32>, <4 x i32>* [[T0]], align
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 1
+// CHECK:   [[SECOND:%.*]] = load i32, i32* [[T0]], align
+// CHECK:   call [[SWIFTCC]] void @take_int5(<4 x i32> [[FIRST]], i32 [[SECOND]])
+// CHECK:   ret void
diff --git a/clang/test/CodeGenCXX/arm-swiftcall.cpp b/clang/test/CodeGenCXX/arm-swiftcall.cpp
new file mode 100644
index 0000000000000..d67a9a0282d6f
--- /dev/null
+++ b/clang/test/CodeGenCXX/arm-swiftcall.cpp
@@ -0,0 +1,115 @@
+// RUN: %clang_cc1 -triple armv7-apple-darwin9 -emit-llvm -o - %s -Wno-return-type-c-linkage | FileCheck %s
+
+// This isn't really testing anything ARM-specific; it's just a convenient
+// 32-bit platform.
+
+#define SWIFTCALL __attribute__((swiftcall))
+#define OUT __attribute__((swift_indirect_result))
+#define ERROR __attribute__((swift_error_result))
+#define CONTEXT __attribute__((swift_context))
+
+/*****************************************************************************/
+/********************************** LOWERING *********************************/
+/*****************************************************************************/
+
+#define TEST(TYPE)                                  \
+  extern "C" SWIFTCALL TYPE return_##TYPE(void) {   \
+    TYPE result = {};                               \
+    return result;                                  \
+  }                                                 \
+  extern "C" SWIFTCALL void take_##TYPE(TYPE v) {   \
+  }                                                 \
+  extern "C" void test_##TYPE() {                   \
+    take_##TYPE(return_##TYPE());                   \
+  }
+
+/*****************************************************************************/
+/*********************************** STRUCTS *********************************/
+/*****************************************************************************/
+
+typedef struct {
+} struct_empty;
+TEST(struct_empty);
+// CHECK-LABEL: define {{.*}} @return_struct_empty()
+// CHECK:   ret void
+// CHECK-LABEL: define {{.*}} @take_struct_empty()
+// CHECK:   ret void
+
+// This is only properly testable in C++ because it relies on empty structs
+// actually taking up space in a structure without requiring any extra data
+// to be passed.
+typedef struct {
+  int x;
+  struct_empty padding[2];
+  char c1;
+  float f0;
+  float f1;
+} struct_1;
+TEST(struct_1);
+// CHECK-LABEL: define {{.*}} @return_struct_1()
+// CHECK:   [[RET:%.*]] = alloca [[REC:%.*]], align 4
+// CHECK:   @llvm.memset
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[RET]] to [[AGG:{ i32, \[2 x i8\], i8, \[1 x i8\], float, float }]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[SECOND:%.*]] = load i8, i8* [[T0]], align 2
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 5
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = insertvalue [[UAGG:{ i32, i8, float, float }]] undef, i32 [[FIRST]], 0
+// CHECK:   [[T1:%.*]] = insertvalue [[UAGG]] [[T0]], i8 [[SECOND]], 1
+// CHECK:   [[T2:%.*]] = insertvalue [[UAGG]] [[T1]], float [[THIRD]], 2
+// CHECK:   [[T3:%.*]] = insertvalue [[UAGG]] [[T2]], float [[FOURTH]], 3
+// CHECK:   ret [[UAGG]] [[T3]]
+// CHECK-LABEL: define {{.*}} @take_struct_1(i32, i8, float, float)
+// CHECK:   [[V:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[V]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   store i32 %0, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   store i8 %1, i8* [[T0]], align 2
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   store float %2, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 5
+// CHECK:   store float %3, float* [[T0]], align 4
+// CHECK:   ret void
+// CHECK-LABEL: define void @test_struct_1()
+// CHECK:   [[TMP:%.*]] = alloca [[REC]], align 4
+// CHECK:   [[CALL:%.*]] = call [[SWIFTCC:cc16]] [[UAGG]] @return_struct_1()
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 0
+// CHECK:   store i32 [[T1]], i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 1
+// CHECK:   store i8 [[T1]], i8* [[T0]], align 2
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 2
+// CHECK:   store float [[T1]], float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 5
+// CHECK:   [[T1:%.*]] = extractvalue [[UAGG]] [[CALL]], 3
+// CHECK:   store float [[T1]], float* [[T0]], align 4
+// CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG]]*
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
+// CHECK:   [[FIRST:%.*]] = load i32, i32* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 2
+// CHECK:   [[SECOND:%.*]] = load i8, i8* [[T0]], align 2
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 4
+// CHECK:   [[THIRD:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 5
+// CHECK:   [[FOURTH:%.*]] = load float, float* [[T0]], align 4
+// CHECK:   call [[SWIFTCC]] void @take_struct_1(i32 [[FIRST]], i8 [[SECOND]], float [[THIRD]], float [[FOURTH]])
+// CHECK:   ret void
+
+struct struct_indirect_1 {
+  int x;
+  ~struct_indirect_1();
+};
+TEST(struct_indirect_1)
+
+// CHECK-LABEL: define {{.*}} void @return_struct_indirect_1({{.*}} noalias sret
+
+// Should not be byval.
+// CHECK-LABEL: define {{.*}} void @take_struct_indirect_1({{.*}}*{{( %.*)?}})