diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 56198385de9dc..d35ce0409d723 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -197,11 +197,11 @@ namespace {
     llvm::Value *getScalarRValValueOrNull(RValue RVal) const;
 
     /// Converts an rvalue to integer value if needed.
-    llvm::Value *convertRValueToInt(RValue RVal, bool CastFP = true) const;
+    llvm::Value *convertRValueToInt(RValue RVal, bool CmpXchg = false) const;
 
     RValue ConvertToValueOrAtomic(llvm::Value *IntVal, AggValueSlot ResultSlot,
                                   SourceLocation Loc, bool AsValue,
-                                  bool CastFP = true) const;
+                                  bool CmpXchg = false) const;
 
     /// Copy an atomic r-value into atomic-layout memory.
     void emitCopyIntoMemory(RValue rvalue) const;
@@ -264,7 +264,7 @@ namespace {
                                llvm::AtomicOrdering AO, bool IsVolatile);
     /// Emits atomic load as LLVM instruction.
     llvm::Value *EmitAtomicLoadOp(llvm::AtomicOrdering AO, bool IsVolatile,
-                                  bool CastFP = true);
+                                  bool CmpXchg = false);
     /// Emits atomic compare-and-exchange op as a libcall.
     llvm::Value *EmitAtomicCompareExchangeLibcall(
         llvm::Value *ExpectedAddr, llvm::Value *DesiredAddr,
@@ -1401,13 +1401,26 @@ RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
       LVal.getBaseInfo(), TBAAAccessInfo()));
 }
 
+/// Return true if \param ValTy is a type that should be casted to integer
+/// around the atomic memory operation. If \param CmpXchg is true, then the
+/// cast of a floating point type is made as that instruction can not have
+/// floating point operands.  TODO: Allow compare-and-exchange and FP - see
+/// comment in AtomicExpandPass.cpp.
+static bool shouldCastToInt(llvm::Type *ValTy, bool CmpXchg) {
+  if (ValTy->isFloatingPointTy())
+    return ValTy->isX86_FP80Ty() || CmpXchg;
+  return !ValTy->isIntegerTy() && !ValTy->isPointerTy();
+}
+
 RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
                                           AggValueSlot ResultSlot,
                                           SourceLocation Loc, bool AsValue,
-                                          bool CastFP) const {
+                                          bool CmpXchg) const {
   // Try not to in some easy cases.
-  assert((Val->getType()->isIntegerTy() || Val->getType()->isIEEELikeFPTy()) &&
-         "Expected integer or floating point value");
+  assert((Val->getType()->isIntegerTy() || Val->getType()->isPointerTy() ||
+          Val->getType()->isIEEELikeFPTy()) &&
+         "Expected integer, pointer or floating point value when converting "
+         "result.");
   if (getEvaluationKind() == TEK_Scalar &&
       (((!LVal.isBitField() ||
          LVal.getBitFieldInfo().Size == ValueSizeInBits) &&
@@ -1416,13 +1429,12 @@ RValue AtomicInfo::ConvertToValueOrAtomic(llvm::Value *Val,
     auto *ValTy = AsValue
                       ? CGF.ConvertTypeForMem(ValueTy)
                       : getAtomicAddress().getElementType();
-    if (ValTy->isIntegerTy() || (!CastFP && ValTy->isIEEELikeFPTy())) {
+    if (!shouldCastToInt(ValTy, CmpXchg)) {
       assert((!ValTy->isIntegerTy() || Val->getType() == ValTy) &&
              "Different integer types.");
       return RValue::get(CGF.EmitFromMemory(Val, ValueTy));
-    } else if (ValTy->isPointerTy())
-      return RValue::get(CGF.Builder.CreateIntToPtr(Val, ValTy));
-    else if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
+    }
+    if (llvm::CastInst::isBitCastable(Val->getType(), ValTy))
       return RValue::get(CGF.Builder.CreateBitCast(Val, ValTy));
   }
 
@@ -1459,10 +1471,10 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
 }
 
 llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
-                                          bool IsVolatile, bool CastFP) {
+                                          bool IsVolatile, bool CmpXchg) {
   // Okay, we're doing this natively.
   Address Addr = getAtomicAddress();
-  if (!(Addr.getElementType()->isIEEELikeFPTy() && !CastFP))
+  if (shouldCastToInt(Addr.getElementType(), CmpXchg))
     Addr = castToAtomicIntPointer(Addr);
   llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
   Load->setAtomic(AO);
@@ -1523,7 +1535,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
   }
 
   // Okay, we're doing this natively.
-  auto *Load = EmitAtomicLoadOp(AO, IsVolatile, /*CastFP=*/false);
+  auto *Load = EmitAtomicLoadOp(AO, IsVolatile);
 
   // If we're ignoring an aggregate return, don't do anything.
   if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
@@ -1531,8 +1543,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
 
   // Okay, turn that back into the original value or atomic (for non-simple
   // lvalues) type.
-  return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue,
-                                /*CastFP=*/false);
+  return ConvertToValueOrAtomic(Load, ResultSlot, Loc, AsValue);
 }
 
 /// Emit a load from an l-value of atomic type.  Note that the r-value
@@ -1601,20 +1612,17 @@ llvm::Value *AtomicInfo::getScalarRValValueOrNull(RValue RVal) const {
   return nullptr;
 }
 
-llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CastFP) const {
+llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal, bool CmpXchg) const {
   // If we've got a scalar value of the right size, try to avoid going
   // through memory. Floats get casted if needed by AtomicExpandPass.
   if (llvm::Value *Value = getScalarRValValueOrNull(RVal)) {
-    if (isa<llvm::IntegerType>(Value->getType()) ||
-        (!CastFP && Value->getType()->isIEEELikeFPTy()))
+    if (!shouldCastToInt(Value->getType(), CmpXchg))
       return CGF.EmitToMemory(Value, ValueTy);
     else {
       llvm::IntegerType *InputIntTy = llvm::IntegerType::get(
           CGF.getLLVMContext(),
           LVal.isSimple() ? getValueSizeInBits() : getAtomicSizeInBits());
-      if (isa<llvm::PointerType>(Value->getType()))
-        return CGF.Builder.CreatePtrToInt(Value, InputIntTy);
-      else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
+      if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy))
         return CGF.Builder.CreateBitCast(Value, InputIntTy);
     }
   }
@@ -1687,13 +1695,14 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
 
   // If we've got a scalar value of the right size, try to avoid going
   // through memory.
-  auto *ExpectedVal = convertRValueToInt(Expected);
-  auto *DesiredVal = convertRValueToInt(Desired);
+  auto *ExpectedVal = convertRValueToInt(Expected, /*CmpXchg=*/true);
+  auto *DesiredVal = convertRValueToInt(Desired, /*CmpXchg=*/true);
   auto Res = EmitAtomicCompareExchangeOp(ExpectedVal, DesiredVal, Success,
                                          Failure, IsWeak);
   return std::make_pair(
       ConvertToValueOrAtomic(Res.first, AggValueSlot::ignored(),
-                             SourceLocation(), /*AsValue=*/false),
+                             SourceLocation(), /*AsValue=*/false,
+                             /*CmpXchg=*/true),
       Res.second);
 }
 
@@ -1787,7 +1796,7 @@ void AtomicInfo::EmitAtomicUpdateOp(
   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
 
   // Do the atomic load.
-  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
+  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true);
   // For non-simple lvalues perform compare-and-swap procedure.
   auto *ContBB = CGF.createBasicBlock("atomic_cont");
   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@@ -1803,7 +1812,8 @@ void AtomicInfo::EmitAtomicUpdateOp(
     CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
   }
   auto OldRVal = ConvertToValueOrAtomic(PHI, AggValueSlot::ignored(),
-                                        SourceLocation(), /*AsValue=*/false);
+                                        SourceLocation(), /*AsValue=*/false,
+                                        /*CmpXchg=*/true);
   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
   auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
   // Try to write new value using cmpxchg operation.
@@ -1869,7 +1879,7 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
 
   // Do the atomic load.
-  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile);
+  auto *OldVal = EmitAtomicLoadOp(Failure, IsVolatile, /*CmpXchg=*/true);
   // For non-simple lvalues perform compare-and-swap procedure.
   auto *ContBB = CGF.createBasicBlock("atomic_cont");
   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
@@ -1969,21 +1979,16 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
     }
 
     // Okay, we're doing this natively.
-    llvm::Value *ValToStore =
-        atomics.convertRValueToInt(rvalue, /*CastFP=*/false);
+    llvm::Value *ValToStore = atomics.convertRValueToInt(rvalue);
 
     // Do the atomic store.
     Address Addr = atomics.getAtomicAddress();
-    bool ShouldCastToInt = true;
     if (llvm::Value *Value = atomics.getScalarRValValueOrNull(rvalue))
-      if (isa<llvm::IntegerType>(Value->getType()) ||
-          Value->getType()->isIEEELikeFPTy())
-        ShouldCastToInt = false;
-    if (ShouldCastToInt) {
-      Addr = atomics.castToAtomicIntPointer(Addr);
-      ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
-                                         /*isSigned=*/false);
-    }
+      if (shouldCastToInt(Value->getType(), /*CmpXchg=*/false)) {
+        Addr = atomics.castToAtomicIntPointer(Addr);
+        ValToStore = Builder.CreateIntCast(ValToStore, Addr.getElementType(),
+                                           /*isSigned=*/false);
+      }
     llvm::StoreInst *store = Builder.CreateStore(ValToStore, Addr);
 
     if (AO == llvm::AtomicOrdering::Acquire)
diff --git a/clang/test/CodeGen/atomic.c b/clang/test/CodeGen/atomic.c
index af5c056bbfe6e..16c29e282ddd9 100644
--- a/clang/test/CodeGen/atomic.c
+++ b/clang/test/CodeGen/atomic.c
@@ -1,10 +1,14 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-apple-darwin9 | FileCheck %s --check-prefixes=CHECK,X86
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=s390x-linux-gnu | FileCheck %s --check-prefixes=CHECK,SYSTEMZ
 
 // CHECK: @[[NONSTATIC_GLOB_POINTER_FROM_INT:.+]] = global ptr null
 // CHECK: @[[GLOB_POINTER:.+]] = internal global ptr null
 // CHECK: @[[GLOB_POINTER_FROM_INT:.+]] = internal global ptr null
 // CHECK: @[[GLOB_INT:.+]] = internal global i32 0
 // CHECK: @[[GLOB_FLT:.+]] = internal global float {{[0e\+-\.]+}}, align
+// CHECK: @[[GLOB_DBL:.+]] = internal global double {{[0e\+-\.]+}}, align
+// X86:   @[[GLOB_LONGDBL:.+]] = internal global x86_fp80 {{[0xK]+}}, align
+// SYSTEMZ: @[[GLOB_LONGDBL:.+]] = internal global fp128 {{[0xL]+}}, align
 
 int atomic(void) {
   // non-sensical test for sync functions
@@ -79,8 +83,10 @@ int atomic(void) {
   // CHECK: atomicrmw nand ptr %valc, i8 6 seq_cst, align 1
  
   __sync_val_compare_and_swap((void **)0, (void *)0, (void *)0);
-  // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
-  // CHECK: extractvalue { i32, i1 } [[PAIR]], 0
+  // X86:      [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
+  // X86-NEXT: extractvalue { i32, i1 } [[PAIR]], 0
+  // SYSTEMZ:      [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8
+  // SYSTEMZ-NEXT: extractvalue { i64, i1 } [[PAIR]], 0
 
   if ( __sync_val_compare_and_swap(&valb, 0, 1)) {
     // CHECK: [[PAIR:%[a-z0-9_.]+]] = cmpxchg ptr %valb, i8 0, i8 1 seq_cst seq_cst, align 1
@@ -90,13 +96,15 @@ int atomic(void) {
   }
   
   __sync_bool_compare_and_swap((void **)0, (void *)0, (void *)0);
-  // CHECK: cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
+  // X86:     cmpxchg ptr null, i32 0, i32 0 seq_cst seq_cst, align 4
+  // SYSTEMZ: cmpxchg ptr null, i64 0, i64 0 seq_cst seq_cst, align 8
   
   __sync_lock_release(&val);
   // CHECK: store atomic i32 0, {{.*}} release, align 4
 
   __sync_lock_release(&ptrval);
-  // CHECK: store atomic i32 0, {{.*}} release, align 4
+  // X86:     store atomic i32 0, {{.*}} release, align 4
+  // SYSTEMZ: store atomic i64 0, {{.*}} release, align 8
 
   __sync_synchronize ();
   // CHECK: fence seq_cst
@@ -131,19 +139,25 @@ static _Atomic(int *) glob_pointer_from_int = 0;
 _Atomic(int *) nonstatic_glob_pointer_from_int = 0LL;
 static _Atomic int glob_int = 0;
 static _Atomic float glob_flt = 0.0f;
+static _Atomic double glob_dbl = 0.0f;
+static _Atomic long double glob_longdbl = 0.0f;
 
 void force_global_uses(void) {
+  // X86:   %atomic-temp = alloca x86_fp80, align 16
   (void)glob_pointer;
-  // CHECK: %[[LOCAL_INT:.+]] = load atomic i32, ptr @[[GLOB_POINTER]] seq_cst
-  // CHECK-NEXT: inttoptr i32 %[[LOCAL_INT]] to ptr
+  // CHECK: load atomic ptr, ptr @[[GLOB_POINTER]] seq_cst
   (void)glob_pointer_from_int;
-  // CHECK: %[[LOCAL_INT_2:.+]] = load atomic i32, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst
-  // CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_2]] to ptr
+  // CHECK-NEXT: load atomic ptr, ptr @[[GLOB_POINTER_FROM_INT]] seq_cst
   (void)nonstatic_glob_pointer_from_int;
-  // CHECK: %[[LOCAL_INT_3:.+]] = load atomic i32, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst
-  // CHECK-NEXT: inttoptr i32 %[[LOCAL_INT_3]] to ptr
+  // CHECK-NEXT: load atomic ptr, ptr @[[NONSTATIC_GLOB_POINTER_FROM_INT]] seq_cst
   (void)glob_int;
-  // CHECK: load atomic i32, ptr @[[GLOB_INT]] seq_cst
+  // CHECK-NEXT: load atomic i32, ptr @[[GLOB_INT]] seq_cst
   (void)glob_flt;
-  // CHECK: load atomic float, ptr @[[GLOB_FLT]] seq_cst
+  // CHECK-NEXT: load atomic float, ptr @[[GLOB_FLT]] seq_cst
+  (void)glob_dbl;
+  // CHECK-NEXT: load atomic double, ptr @[[GLOB_DBL]] seq_cst
+  (void)glob_longdbl;
+  // X86:      call void @__atomic_load(i32 noundef 16, ptr noundef @glob_longdbl, ptr noundef %atomic-temp
+  // X86-NEXT: %0 = load x86_fp80, ptr %atomic-temp, align 16
+  // SYSTEMZ: load atomic fp128, ptr @[[GLOB_LONGDBL]] seq_cst
 }