diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 438b6ff55c85f..291588124dccd 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5125,14 +5125,6 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
   // Inline Asm Support hooks
   //
 
-  /// This hook allows the target to expand an inline asm call to be explicit
-  /// llvm code if it wants to.  This is useful for turning simple inline asms
-  /// into LLVM intrinsics, which gives the compiler more information about the
-  /// behavior of the code.
-  virtual bool ExpandInlineAsm(CallInst *) const {
-    return false;
-  }
-
   enum ConstraintType {
     C_Register,            // Constraint represents specific register(s).
     C_RegisterClass,       // Constraint represents any of register(s) in class.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 0e40a92fd8d64..9db4c9e5e2807 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2618,22 +2618,9 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
 bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
   BasicBlock *BB = CI->getParent();
 
-  // Lower inline assembly if we can.
-  // If we found an inline asm expession, and if the target knows how to
-  // lower it to normal LLVM code, do so now.
-  if (CI->isInlineAsm()) {
-    if (TLI->ExpandInlineAsm(CI)) {
-      // Avoid invalidating the iterator.
-      CurInstIterator = BB->begin();
-      // Avoid processing instructions out of order, which could cause
-      // reuse before a value is defined.
-      SunkAddrs.clear();
-      return true;
-    }
-    // Sink address computing for memory operands into the block.
-    if (optimizeInlineAsmInst(CI))
-      return true;
-  }
+  // Sink address computing for memory operands into the block.
+  if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
+    return true;
 
   // Align the pointer arguments to this call if the target thinks it's a good
   // idea
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 22ba30b734a16..5ac09e2f9fb1c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20192,37 +20192,6 @@ bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
 //                           ARM Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
-  // Looking for "rev" which is V6+.
-  if (!Subtarget->hasV6Ops())
-    return false;
-
-  InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
-  StringRef AsmStr = IA->getAsmString();
-  SmallVector<StringRef, 4> AsmPieces;
-  SplitString(AsmStr, AsmPieces, ";\n");
-
-  switch (AsmPieces.size()) {
-  default: return false;
-  case 1:
-    AsmStr = AsmPieces[0];
-    AsmPieces.clear();
-    SplitString(AsmStr, AsmPieces, " \t,");
-
-    // rev $0, $1
-    if (AsmPieces.size() == 3 && AsmPieces[0] == "rev" &&
-        AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
-        IA->getConstraintString().starts_with("=l,l")) {
-      IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-      if (Ty && Ty->getBitWidth() == 32)
-        return IntrinsicLowering::LowerToByteSwap(CI);
-    }
-    break;
-  }
-
-  return false;
-}
-
 const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
   // At this point, we have to lower this constraint to something else, so we
   // lower it to an "r" or "w". However, by doing this we will force the result
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 196ecb1b9f678..955e47bf033fc 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -534,8 +534,6 @@ class VectorType;
                                       const APInt &DemandedElts,
                                       TargetLoweringOpt &TLO) const override;
 
-    bool ExpandInlineAsm(CallInst *CI) const override;
-
     ConstraintType getConstraintType(StringRef Constraint) const override;
 
     /// Examine constraint string and operand type and determine a weight value.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 572cfdad3c93b..0110b1ed23656 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -61005,117 +61005,6 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-// Helper to match a string separated by whitespace.
-static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
-  S = S.substr(S.find_first_not_of(" \t")); // Skip leading whitespace.
-
-  for (StringRef Piece : Pieces) {
-    if (!S.starts_with(Piece)) // Check if the piece matches.
-      return false;
-
-    S = S.substr(Piece.size());
-    StringRef::size_type Pos = S.find_first_not_of(" \t");
-    if (Pos == 0) // We matched a prefix.
-      return false;
-
-    S = S.substr(Pos);
-  }
-
-  return S.empty();
-}
-
-static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
-
-  if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
-    if (llvm::is_contained(AsmPieces, "~{cc}") &&
-        llvm::is_contained(AsmPieces, "~{flags}") &&
-        llvm::is_contained(AsmPieces, "~{fpsr}")) {
-
-      if (AsmPieces.size() == 3)
-        return true;
-      else if (llvm::is_contained(AsmPieces, "~{dirflag}"))
-        return true;
-    }
-  }
-  return false;
-}
-
-bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
-  InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
-
-  StringRef AsmStr = IA->getAsmString();
-
-  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-  if (!Ty || Ty->getBitWidth() % 16 != 0)
-    return false;
-
-  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
-  SmallVector<StringRef, 4> AsmPieces;
-  SplitString(AsmStr, AsmPieces, ";\n");
-
-  switch (AsmPieces.size()) {
-  default: return false;
-  case 1:
-    // FIXME: this should verify that we are targeting a 486 or better.  If not,
-    // we will turn this bswap into something that will be lowered to logical
-    // ops instead of emitting the bswap asm.  For now, we don't support 486 or
-    // lower so don't worry about this.
-    // bswap $0
-    if (matchAsm(AsmPieces[0], {"bswap", "$0"}) ||
-        matchAsm(AsmPieces[0], {"bswapl", "$0"}) ||
-        matchAsm(AsmPieces[0], {"bswapq", "$0"}) ||
-        matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) ||
-        matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) ||
-        matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) {
-      // No need to check constraints, nothing other than the equivalent of
-      // "=r,0" would be valid here.
-      return IntrinsicLowering::LowerToByteSwap(CI);
-    }
-
-    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
-    if (CI->getType()->isIntegerTy(16) &&
-        IA->getConstraintString().starts_with("=r,0,") &&
-        (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) ||
-         matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) {
-      AsmPieces.clear();
-      StringRef ConstraintsStr = IA->getConstraintString();
-      SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
-      array_pod_sort(AsmPieces.begin(), AsmPieces.end());
-      if (clobbersFlagRegisters(AsmPieces))
-        return IntrinsicLowering::LowerToByteSwap(CI);
-    }
-    break;
-  case 3:
-    if (CI->getType()->isIntegerTy(32) &&
-        IA->getConstraintString().starts_with("=r,0,") &&
-        matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) &&
-        matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) &&
-        matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) {
-      AsmPieces.clear();
-      StringRef ConstraintsStr = IA->getConstraintString();
-      SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
-      array_pod_sort(AsmPieces.begin(), AsmPieces.end());
-      if (clobbersFlagRegisters(AsmPieces))
-        return IntrinsicLowering::LowerToByteSwap(CI);
-    }
-
-    if (CI->getType()->isIntegerTy(64)) {
-      InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
-      if (Constraints.size() >= 2 &&
-          Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
-          Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
-        // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
-        if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) &&
-            matchAsm(AsmPieces[1], {"bswap", "%edx"}) &&
-            matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"}))
-          return IntrinsicLowering::LowerToByteSwap(CI);
-      }
-    }
-    break;
-  }
-  return false;
-}
-
 static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) {
   X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint)
                            .Case("{@cca}", X86::COND_A)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index d888f9f593ee7..0c9ba591b03eb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1364,8 +1364,6 @@ namespace llvm {
 
     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
 
-    bool ExpandInlineAsm(CallInst *CI) const override;
-
     ConstraintType getConstraintType(StringRef Constraint) const override;
 
     /// Examine constraint string and operand type and determine a weight value.
diff --git a/llvm/test/CodeGen/ARM/bswap-inline-asm.ll b/llvm/test/CodeGen/ARM/bswap-inline-asm.ll
index 31f9d729cf6e6..cc92a9710c98a 100644
--- a/llvm/test/CodeGen/ARM/bswap-inline-asm.ll
+++ b/llvm/test/CodeGen/ARM/bswap-inline-asm.ll
@@ -1,8 +1,10 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 | FileCheck %s
 
+; rev inline assembly should be preserved as-is.
+
 define i32 @t1(i32 %x) nounwind {
 ; CHECK-LABEL: t1:
-; CHECK-NOT: InlineAsm
+; CHECK: InlineAsm
 ; CHECK: rev
   %asmtmp = tail call i32 asm "rev $0, $1\0A", "=l,l"(i32 %x) nounwind
   ret i32 %asmtmp
diff --git a/llvm/test/CodeGen/X86/bswap-inline-asm.ll b/llvm/test/CodeGen/X86/bswap-inline-asm.ll
index e5ffec7801fc5..a9ce616b7eccc 100644
--- a/llvm/test/CodeGen/X86/bswap-inline-asm.ll
+++ b/llvm/test/CodeGen/X86/bswap-inline-asm.ll
@@ -1,11 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 
+; bswap inline assembly should be preserved as-is.
+
 define i64 @foo(i64 %x) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapq %rax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
 	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
@@ -15,7 +19,9 @@ define i64 @bar(i64 %x) nounwind {
 ; CHECK-LABEL: bar:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapq %rax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
 	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
 	ret i64 %asmtmp
@@ -25,16 +31,20 @@ define i32 @pen(i32 %x) nounwind {
 ; CHECK-LABEL: pen:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
-	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+	%asmtmp = tail call i32 asm "bswapl ${0:k}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
 	ret i32 %asmtmp
 }
 
 define zeroext i16 @s16(i16 zeroext %x) nounwind {
 ; CHECK-LABEL: s16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    rolw $8, %di
+; CHECK-NEXT:    ## InlineAsm Start
+; CHECK-NEXT:    rorw $8, %di
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    movzwl %di, %eax
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
@@ -44,7 +54,9 @@ define zeroext i16 @s16(i16 zeroext %x) nounwind {
 define zeroext i16 @t16(i16 zeroext %x) nounwind {
 ; CHECK-LABEL: t16:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    rolw $8, %di
+; CHECK-NEXT:    ## InlineAsm Start
+; CHECK-NEXT:    rorw $8, %di
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    movzwl %di, %eax
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
@@ -54,7 +66,9 @@ define zeroext i16 @t16(i16 zeroext %x) nounwind {
 define zeroext i16 @u16(i16 zeroext %x) nounwind {
 ; CHECK-LABEL: u16:
 ; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    rolw $8, %di
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    movzwl %di, %eax
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
@@ -64,7 +78,9 @@ define zeroext i16 @u16(i16 zeroext %x) nounwind {
 define zeroext i16 @v16(i16 zeroext %x) nounwind {
 ; CHECK-LABEL: v16:
 ; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    rolw $8, %di
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    movzwl %di, %eax
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
@@ -75,7 +91,9 @@ define i32 @s32(i32 %x) nounwind {
 ; CHECK-LABEL: s32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
   ret i32 %asmtmp
@@ -85,7 +103,9 @@ define i32 @t32(i32 %x) nounwind {
 ; CHECK-LABEL: t32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
   ret i32 %asmtmp
@@ -95,7 +115,11 @@ define i32 @u32(i32 %x) nounwind {
 ; CHECK-LABEL: u32:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    ## InlineAsm Start
+; CHECK-NEXT:    rorw $8, %ax
+; CHECK-NEXT:    rorl $16, %eax
+; CHECK-NEXT:    rorw $8, %ax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
   ret i32 %asmtmp
@@ -105,7 +129,9 @@ define i64 @s64(i64 %x) nounwind {
 ; CHECK-LABEL: s64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapq %rax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
   ret i64 %asmtmp
@@ -115,7 +141,9 @@ define i64 @t64(i64 %x) nounwind {
 ; CHECK-LABEL: t64:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    bswapq %rax
+; CHECK-NEXT:    ## InlineAsm End
 ; CHECK-NEXT:    retq
   %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
   ret i64 %asmtmp
diff --git a/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll b/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll
index 57dccfc1b4a84..0538541a6f7ba 100644
--- a/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -18,9 +18,9 @@ define i64 @t(ptr %arg) nounwind {
         ret i64 0
 }
 
-; Make sure that we translate this to the bswap intrinsic which lowers down without the
-; inline assembly.
-; CHECK-NOT: #APP
+; Make sure this lowers to inline assembly and is not translated to an
+; intrinsic.
+; CHECK: #APP
 define i32 @s(i32 %argc, ptr nocapture %argv) unnamed_addr nounwind {
 entry:
   %0 = trunc i32 %argc to i16
diff --git a/llvm/test/CodeGen/X86/pr67333.ll b/llvm/test/CodeGen/X86/pr67333.ll
index cbb730857506d..accdd04f084df 100644
--- a/llvm/test/CodeGen/X86/pr67333.ll
+++ b/llvm/test/CodeGen/X86/pr67333.ll
@@ -7,8 +7,14 @@ declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0
 define void @SHA256_Compress_Generic(ptr noundef %ctx) #1 {
 ; CHECK-LABEL: SHA256_Compress_Generic:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movbel 0, %eax
-; CHECK-NEXT:    movbel 12(%rdi), %ecx
+; CHECK-NEXT:    movl 0, %eax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    bswapl %eax
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:    movl 12(%rdi), %ecx
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    bswapl %ecx
+; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    vmovd %eax, %xmm0
 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm1 = [128,128,128,128,0,1,2,3,128,128,128,128,128,128,128,128]
 ; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm2